You're Invited:Meet the Socket Team at BlackHat and DEF CON in Las Vegas, Aug 4-6.RSVP
Socket
Book a DemoInstallSign in
Socket

ocr-click-plugin

Package Overview
Dependencies
Maintainers
1
Versions
10
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

ocr-click-plugin - npm Package Compare versions

Comparing version

to
2.1.7

133

dist/index.js

@@ -16,5 +16,4 @@ "use strict";

exports.OCRClickPlugin = void 0;
const plugin_1 = require("appium/plugin");
const tesseract_js_1 = require("tesseract.js");
const path_1 = __importDefault(require("path"));
const base_plugin_1 = require("@appium/base-plugin");
const tesseract_js_1 = __importDefault(require("tesseract.js"));
// Try to import Sharp, but handle gracefully if not available

@@ -29,4 +28,5 @@ let sharp = null;

}
// cache trained data in the build dir
const CACHE_PATH = path_1.default.resolve(__dirname);
const SOURCE_URL_REGEX = new RegExp('/session/[^/]+/appium/plugin/textclick');
const CHECK_TEXT_URL_REGEX = new RegExp('/session/[^/]+/appium/plugin/checktext');
const EXECUTE_URL_REGEX = new RegExp('/session/[^/]+/execute');
// Tesseract configuration for better accuracy

@@ -36,3 +36,3 @@ const TESSERACT_CONFIG = {

tessedit_char_whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?-_@#$%^&*()', // Limit recognized characters
tessedit_pageseg_mode: tesseract_js_1.PSM.SINGLE_BLOCK, // Assume uniform text block
tessedit_pageseg_mode: '6', // Assume uniform text block
tessedit_do_invert: '0',

@@ -46,10 +46,10 @@ preserve_interword_spaces: '1',

const MIN_CONFIDENCE_THRESHOLD = 60;
class OCRClickPlugin extends plugin_1.BasePlugin {
constructor(name) {
super(name);
this.isWorkerReady = false;
}
class OCRClickPlugin extends base_plugin_1.BasePlugin {
shouldAvoidProxy(method, route, body) {
// Handle plugin routes
if (SOURCE_URL_REGEX.test(route) || CHECK_TEXT_URL_REGEX.test(route)) {
return true;
}
// Handle execute commands for mobile: textclick and mobile: checktext
if (route.includes('/execute') && (body === null || body === void 0 ? void 0 : body.script)) {
if (EXECUTE_URL_REGEX.test(route) && (body === null || body === void 0 ? void 0 : body.script)) {
const script = body.script;

@@ -96,42 +96,23 @@ if (script === 'mobile: textclick' || script === 'mobile: checktext') {

}
readyWorker(driver) {
return __awaiter(this, void 0, void 0, function* () {
var _a, _b, _c;
if (this.isWorkerReady && this.worker) {
return;
}
this.worker = yield (0, tesseract_js_1.createWorker)({
logger: (x) => this.logger.debug(JSON.stringify(x)),
cachePath: CACHE_PATH,
});
const lang = ((_c = (_b = (_a = driver.settings) === null || _a === void 0 ? void 0 : _a.getSettings) === null || _b === void 0 ? void 0 : _b.call(_a)) === null || _c === void 0 ? void 0 : _c.ocrLanguage) || 'eng';
yield this.worker.loadLanguage(lang);
yield this.worker.initialize(lang);
yield this.worker.setParameters(TESSERACT_CONFIG);
this.isWorkerReady = true;
});
}
findAndClickText(next_1, driver_1, text_1) {
return __awaiter(this, arguments, void 0, function* (next, driver, text, index = 0) {
try {
if (!driver.getScreenshot) {
throw new Error('This driver does not support screenshot functionality');
}
if (!driver.getScreenshot)
return;
// Step 1: Capture screenshot
const screenshotBase64 = yield driver.getScreenshot();
this.logger.info('Enhancing screenshot for better OCR results...');
console.log('Enhancing screenshot for better OCR results...');
// Step 2: Enhance the screenshot (if Sharp is available)
const enhancedBase64Image = yield this.enhanceScreenshot(screenshotBase64);
// Step 3: Ensure worker is ready
yield this.readyWorker(driver);
if (!this.worker) {
throw new Error('OCR worker was not initialized');
}
// Step 4: Process the enhanced screenshot with OCR
this.logger.info('Processing enhanced screenshot with OCR...');
const result = yield this.worker.recognize(Buffer.from(enhancedBase64Image, 'base64'));
// Step 3: Process the enhanced screenshot with OCR
console.log('Processing enhanced screenshot with OCR...');
const result = yield tesseract_js_1.default.recognize(Buffer.from(enhancedBase64Image, 'base64'), 'eng', Object.assign(Object.assign({}, TESSERACT_CONFIG), { logger: (m) => {
if (m.status === 'recognizing text') {
console.log(`OCR Progress: ${Math.round(m.progress * 100)}%`);
}
} }));
// Filter words by confidence threshold
const words = result.data.words.filter(word => word.confidence >= MIN_CONFIDENCE_THRESHOLD);
this.logger.info('OCR result:', words.map(w => ({ text: w.text, confidence: w.confidence })));
// Step 5: Find all matches for the given text
console.log('OCR result:', words.map(w => ({ text: w.text, confidence: w.confidence })));
// Step 4: Find all matches for the given text
const matchingWords = words.filter(word => {

@@ -148,3 +129,3 @@ const normalizedWord = word.text.toLowerCase().trim();

}
// Step 6: Get the desired match based on index
// Step 5: Get the desired match based on index
const targetWord = matchingWords[index];

@@ -154,7 +135,6 @@ const { x0, y0, x1, y1 } = targetWord.bbox;

const centerY = (y0 + y1) / 2;
this.logger.info(`Text "${targetWord.text}" found at coordinates: (${centerX}, ${centerY}) with confidence: ${targetWord.confidence}%`);
if (!driver.performActions) {
throw new Error('This driver does not support performActions');
}
// Step 7: Perform the click action
console.log(`Text "${targetWord.text}" found at coordinates: (${centerX}, ${centerY}) with confidence: ${targetWord.confidence}%`);
if (!driver.performActions)
return;
// Step 6: Perform the click action
yield driver.performActions([

@@ -172,3 +152,3 @@ {

]);
this.logger.info(`Successfully clicked on text "${text}" at index ${index}`);
console.log(`Successfully clicked on text "${text}" at index ${index}`);
return {

@@ -183,3 +163,3 @@ success: true,

catch (err) {
this.logger.error('Error in findAndClickText:', err);
console.error('Error in findAndClickText:', err);
throw err;

@@ -192,22 +172,20 @@ }

try {
if (!driver.getScreenshot) {
throw new Error('This driver does not support screenshot functionality');
}
if (!driver.getScreenshot)
return;
// Step 1: Capture screenshot
const screenshotBase64 = yield driver.getScreenshot();
this.logger.info('Enhancing screenshot for OCR text detection...');
console.log('Enhancing screenshot for OCR text detection...');
// Step 2: Enhance the screenshot (if Sharp is available)
const enhancedBase64Image = yield this.enhanceScreenshot(screenshotBase64);
// Step 3: Ensure worker is ready
yield this.readyWorker(driver);
if (!this.worker) {
throw new Error('OCR worker was not initialized');
}
// Step 4: Process the enhanced screenshot with OCR
this.logger.info('Processing enhanced screenshot with OCR...');
const result = yield this.worker.recognize(Buffer.from(enhancedBase64Image, 'base64'));
// Step 3: Process the enhanced screenshot with OCR
console.log('Processing enhanced screenshot with OCR...');
const result = yield tesseract_js_1.default.recognize(Buffer.from(enhancedBase64Image, 'base64'), 'eng', Object.assign(Object.assign({}, TESSERACT_CONFIG), { logger: (m) => {
if (m.status === 'recognizing text') {
console.log(`OCR Progress: ${Math.round(m.progress * 100)}%`);
}
} }));
// Filter words by confidence threshold
const words = result.data.words.filter(word => word.confidence >= MIN_CONFIDENCE_THRESHOLD);
this.logger.info('OCR result:', words.map(w => ({ text: w.text, confidence: w.confidence })));
// Step 5: Find all matches for the given text
console.log('OCR result:', words.map(w => ({ text: w.text, confidence: w.confidence })));
// Step 4: Find all matches for the given text
const matchingWords = words.filter(word => {

@@ -229,3 +207,3 @@ const normalizedWord = word.text.toLowerCase().trim();

}));
this.logger.info(`Text "${text}" detection result: ${isPresent ? 'FOUND' : 'NOT FOUND'} (${matchingWords.length} matches)`);
console.log(`Text "${text}" detection result: ${isPresent ? 'FOUND' : 'NOT FOUND'} (${matchingWords.length} matches)`);
return {

@@ -244,3 +222,3 @@ success: true,

catch (err) {
this.logger.error('Error in checkTextPresent:', err);
console.error('Error in checkTextPresent:', err);
const errorMessage = err instanceof Error ? err.message : String(err);

@@ -264,3 +242,3 @@ return {

if (!sharp) {
this.logger.info('Sharp not available - using original image without enhancement');
console.log('Sharp not available - using original image without enhancement');
return base64Image;

@@ -288,3 +266,3 @@ }

catch (err) {
this.logger.error('Error enhancing screenshot with Sharp, using original:', err);
console.error('Error enhancing screenshot with Sharp, using original:', err);
return base64Image; // Fallback to original image

@@ -294,18 +272,5 @@ }

}
deleteSession(next) {
return __awaiter(this, void 0, void 0, function* () {
try {
if (this.worker) {
yield this.worker.terminate();
this.worker = undefined;
this.isWorkerReady = false;
}
}
finally {
yield next();
}
});
}
}
exports.OCRClickPlugin = OCRClickPlugin;
// Define a new method map for Appium commands
OCRClickPlugin.newMethodMap = {

@@ -319,3 +284,2 @@ '/session/:sessionId/appium/plugin/textclick': {

},
neverProxy: true,
},

@@ -330,5 +294,4 @@ },

},
neverProxy: true,
},
},
};
{
"name": "ocr-click-plugin",
"version": "2.1.6",
"version": "2.1.7",
"description": "An Appium plugin that uses OCR (Optical Character Recognition) to find and click text elements on mobile device screens",

@@ -5,0 +5,0 @@ "main": "dist/index.js",