ocr-click-plugin
Advanced tools
Comparing version
@@ -16,5 +16,4 @@ "use strict"; | ||
exports.OCRClickPlugin = void 0; | ||
const plugin_1 = require("appium/plugin"); | ||
const tesseract_js_1 = require("tesseract.js"); | ||
const path_1 = __importDefault(require("path")); | ||
const base_plugin_1 = require("@appium/base-plugin"); | ||
const tesseract_js_1 = __importDefault(require("tesseract.js")); | ||
// Try to import Sharp, but handle gracefully if not available | ||
@@ -29,4 +28,5 @@ let sharp = null; | ||
} | ||
// cache trained data in the build dir | ||
const CACHE_PATH = path_1.default.resolve(__dirname); | ||
const SOURCE_URL_REGEX = new RegExp('/session/[^/]+/appium/plugin/textclick'); | ||
const CHECK_TEXT_URL_REGEX = new RegExp('/session/[^/]+/appium/plugin/checktext'); | ||
const EXECUTE_URL_REGEX = new RegExp('/session/[^/]+/execute'); | ||
// Tesseract configuration for better accuracy | ||
@@ -36,3 +36,3 @@ const TESSERACT_CONFIG = { | ||
tessedit_char_whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?-_@#$%^&*()', // Limit recognized characters | ||
tessedit_pageseg_mode: tesseract_js_1.PSM.SINGLE_BLOCK, // Assume uniform text block | ||
tessedit_pageseg_mode: '6', // Assume uniform text block | ||
tessedit_do_invert: '0', | ||
@@ -46,10 +46,10 @@ preserve_interword_spaces: '1', | ||
const MIN_CONFIDENCE_THRESHOLD = 60; | ||
class OCRClickPlugin extends plugin_1.BasePlugin { | ||
constructor(name) { | ||
super(name); | ||
this.isWorkerReady = false; | ||
} | ||
class OCRClickPlugin extends base_plugin_1.BasePlugin { | ||
shouldAvoidProxy(method, route, body) { | ||
// Handle plugin routes | ||
if (SOURCE_URL_REGEX.test(route) || CHECK_TEXT_URL_REGEX.test(route)) { | ||
return true; | ||
} | ||
// Handle execute commands for mobile: textclick and mobile: checktext | ||
if (route.includes('/execute') && (body === null || body === void 0 ? void 0 : body.script)) { | ||
if (EXECUTE_URL_REGEX.test(route) && (body === null || body === void 0 ? void 0 : body.script)) { | ||
const script = body.script; | ||
@@ -96,42 +96,23 @@ if (script === 'mobile: textclick' || script === 'mobile: checktext') { | ||
} | ||
readyWorker(driver) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
var _a, _b, _c; | ||
if (this.isWorkerReady && this.worker) { | ||
return; | ||
} | ||
this.worker = yield (0, tesseract_js_1.createWorker)({ | ||
logger: (x) => this.logger.debug(JSON.stringify(x)), | ||
cachePath: CACHE_PATH, | ||
}); | ||
const lang = ((_c = (_b = (_a = driver.settings) === null || _a === void 0 ? void 0 : _a.getSettings) === null || _b === void 0 ? void 0 : _b.call(_a)) === null || _c === void 0 ? void 0 : _c.ocrLanguage) || 'eng'; | ||
yield this.worker.loadLanguage(lang); | ||
yield this.worker.initialize(lang); | ||
yield this.worker.setParameters(TESSERACT_CONFIG); | ||
this.isWorkerReady = true; | ||
}); | ||
} | ||
findAndClickText(next_1, driver_1, text_1) { | ||
return __awaiter(this, arguments, void 0, function* (next, driver, text, index = 0) { | ||
try { | ||
if (!driver.getScreenshot) { | ||
throw new Error('This driver does not support screenshot functionality'); | ||
} | ||
if (!driver.getScreenshot) | ||
return; | ||
// Step 1: Capture screenshot | ||
const screenshotBase64 = yield driver.getScreenshot(); | ||
this.logger.info('Enhancing screenshot for better OCR results...'); | ||
console.log('Enhancing screenshot for better OCR results...'); | ||
// Step 2: Enhance the screenshot (if Sharp is available) | ||
const enhancedBase64Image = yield this.enhanceScreenshot(screenshotBase64); | ||
// Step 3: Ensure worker is ready | ||
yield this.readyWorker(driver); | ||
if (!this.worker) { | ||
throw new Error('OCR worker was not initialized'); | ||
} | ||
// Step 4: Process the enhanced screenshot with OCR | ||
this.logger.info('Processing enhanced screenshot with OCR...'); | ||
const result = yield this.worker.recognize(Buffer.from(enhancedBase64Image, 'base64')); | ||
// Step 3: Process the enhanced screenshot with OCR | ||
console.log('Processing enhanced screenshot with OCR...'); | ||
const result = yield tesseract_js_1.default.recognize(Buffer.from(enhancedBase64Image, 'base64'), 'eng', Object.assign(Object.assign({}, TESSERACT_CONFIG), { logger: (m) => { | ||
if (m.status === 'recognizing text') { | ||
console.log(`OCR Progress: ${Math.round(m.progress * 100)}%`); | ||
} | ||
} })); | ||
// Filter words by confidence threshold | ||
const words = result.data.words.filter(word => word.confidence >= MIN_CONFIDENCE_THRESHOLD); | ||
this.logger.info('OCR result:', words.map(w => ({ text: w.text, confidence: w.confidence }))); | ||
// Step 5: Find all matches for the given text | ||
console.log('OCR result:', words.map(w => ({ text: w.text, confidence: w.confidence }))); | ||
// Step 4: Find all matches for the given text | ||
const matchingWords = words.filter(word => { | ||
@@ -148,3 +129,3 @@ const normalizedWord = word.text.toLowerCase().trim(); | ||
} | ||
// Step 6: Get the desired match based on index | ||
// Step 5: Get the desired match based on index | ||
const targetWord = matchingWords[index]; | ||
@@ -154,7 +135,6 @@ const { x0, y0, x1, y1 } = targetWord.bbox; | ||
const centerY = (y0 + y1) / 2; | ||
this.logger.info(`Text "${targetWord.text}" found at coordinates: (${centerX}, ${centerY}) with confidence: ${targetWord.confidence}%`); | ||
if (!driver.performActions) { | ||
throw new Error('This driver does not support performActions'); | ||
} | ||
// Step 7: Perform the click action | ||
console.log(`Text "${targetWord.text}" found at coordinates: (${centerX}, ${centerY}) with confidence: ${targetWord.confidence}%`); | ||
if (!driver.performActions) | ||
return; | ||
// Step 6: Perform the click action | ||
yield driver.performActions([ | ||
@@ -172,3 +152,3 @@ { | ||
]); | ||
this.logger.info(`Successfully clicked on text "${text}" at index ${index}`); | ||
console.log(`Successfully clicked on text "${text}" at index ${index}`); | ||
return { | ||
@@ -183,3 +163,3 @@ success: true, | ||
catch (err) { | ||
this.logger.error('Error in findAndClickText:', err); | ||
console.error('Error in findAndClickText:', err); | ||
throw err; | ||
@@ -192,22 +172,20 @@ } | ||
try { | ||
if (!driver.getScreenshot) { | ||
throw new Error('This driver does not support screenshot functionality'); | ||
} | ||
if (!driver.getScreenshot) | ||
return; | ||
// Step 1: Capture screenshot | ||
const screenshotBase64 = yield driver.getScreenshot(); | ||
this.logger.info('Enhancing screenshot for OCR text detection...'); | ||
console.log('Enhancing screenshot for OCR text detection...'); | ||
// Step 2: Enhance the screenshot (if Sharp is available) | ||
const enhancedBase64Image = yield this.enhanceScreenshot(screenshotBase64); | ||
// Step 3: Ensure worker is ready | ||
yield this.readyWorker(driver); | ||
if (!this.worker) { | ||
throw new Error('OCR worker was not initialized'); | ||
} | ||
// Step 4: Process the enhanced screenshot with OCR | ||
this.logger.info('Processing enhanced screenshot with OCR...'); | ||
const result = yield this.worker.recognize(Buffer.from(enhancedBase64Image, 'base64')); | ||
// Step 3: Process the enhanced screenshot with OCR | ||
console.log('Processing enhanced screenshot with OCR...'); | ||
const result = yield tesseract_js_1.default.recognize(Buffer.from(enhancedBase64Image, 'base64'), 'eng', Object.assign(Object.assign({}, TESSERACT_CONFIG), { logger: (m) => { | ||
if (m.status === 'recognizing text') { | ||
console.log(`OCR Progress: ${Math.round(m.progress * 100)}%`); | ||
} | ||
} })); | ||
// Filter words by confidence threshold | ||
const words = result.data.words.filter(word => word.confidence >= MIN_CONFIDENCE_THRESHOLD); | ||
this.logger.info('OCR result:', words.map(w => ({ text: w.text, confidence: w.confidence }))); | ||
// Step 5: Find all matches for the given text | ||
console.log('OCR result:', words.map(w => ({ text: w.text, confidence: w.confidence }))); | ||
// Step 4: Find all matches for the given text | ||
const matchingWords = words.filter(word => { | ||
@@ -229,3 +207,3 @@ const normalizedWord = word.text.toLowerCase().trim(); | ||
})); | ||
this.logger.info(`Text "${text}" detection result: ${isPresent ? 'FOUND' : 'NOT FOUND'} (${matchingWords.length} matches)`); | ||
console.log(`Text "${text}" detection result: ${isPresent ? 'FOUND' : 'NOT FOUND'} (${matchingWords.length} matches)`); | ||
return { | ||
@@ -244,3 +222,3 @@ success: true, | ||
catch (err) { | ||
this.logger.error('Error in checkTextPresent:', err); | ||
console.error('Error in checkTextPresent:', err); | ||
const errorMessage = err instanceof Error ? err.message : String(err); | ||
@@ -264,3 +242,3 @@ return { | ||
if (!sharp) { | ||
this.logger.info('Sharp not available - using original image without enhancement'); | ||
console.log('Sharp not available - using original image without enhancement'); | ||
return base64Image; | ||
@@ -288,3 +266,3 @@ } | ||
catch (err) { | ||
this.logger.error('Error enhancing screenshot with Sharp, using original:', err); | ||
console.error('Error enhancing screenshot with Sharp, using original:', err); | ||
return base64Image; // Fallback to original image | ||
@@ -294,18 +272,5 @@ } | ||
} | ||
deleteSession(next) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
try { | ||
if (this.worker) { | ||
yield this.worker.terminate(); | ||
this.worker = undefined; | ||
this.isWorkerReady = false; | ||
} | ||
} | ||
finally { | ||
yield next(); | ||
} | ||
}); | ||
} | ||
} | ||
exports.OCRClickPlugin = OCRClickPlugin; | ||
// Define a new method map for Appium commands | ||
OCRClickPlugin.newMethodMap = { | ||
@@ -319,3 +284,2 @@ '/session/:sessionId/appium/plugin/textclick': { | ||
}, | ||
neverProxy: true, | ||
}, | ||
@@ -330,5 +294,4 @@ }, | ||
}, | ||
neverProxy: true, | ||
}, | ||
}, | ||
}; |
{ | ||
"name": "ocr-click-plugin", | ||
"version": "2.1.6", | ||
"version": "2.1.7", | ||
"description": "An Appium plugin that uses OCR (Optical Character Recognition) to find and click text elements on mobile device screens", | ||
@@ -5,0 +5,0 @@ "main": "dist/index.js", |
25802
-4.7%324
-10.25%