owocr
Advanced tools
+10
-11
@@ -6,2 +6,3 @@ from PIL import Image, ImageDraw | ||
| import numpy as np | ||
| import cv2 | ||
| from typing import Tuple, List | ||
@@ -57,15 +58,13 @@ import xml.etree.ElementTree as ET | ||
| max_wh=max(img.shape[0],img.shape[1]) | ||
| paddedimg=np.zeros((max_wh,max_wh,3)).astype(np.uint8) | ||
| paddedimg[:img.shape[0],:img.shape[1],:]=img.copy() | ||
| pil_image = Image.fromarray(paddedimg) | ||
| self.image_width,self.image_height = pil_image.size | ||
| pil_resized = pil_image.resize((self.input_width, self.input_height)) | ||
| resized=np.array(pil_resized) | ||
| #resized=resized[:,:,::-1] | ||
| # Scale input pixel value to 0 to 1 | ||
| resized = resized / 255.0 | ||
| paddedimg=np.zeros((max_wh,max_wh,3),dtype=np.uint8) | ||
| paddedimg[:img.shape[0],:img.shape[1],:]=img | ||
| self.image_width=max_wh | ||
| self.image_height=max_wh | ||
| resized=cv2.resize(paddedimg,(self.input_width, self.input_height),interpolation=cv2.INTER_CUBIC) | ||
| input_image=resized.astype(np.float32) | ||
| input_image/=255.0 | ||
| mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) | ||
| std = np.array([0.229, 0.224, 0.225], dtype=np.float32) | ||
| input_image= (resized-mean) / std | ||
| input_image-=mean | ||
| input_image/=std | ||
| input_image = input_image.transpose(2,0,1) | ||
@@ -72,0 +71,0 @@ input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32) |
+18
-16
@@ -26,3 +26,3 @@ import sys | ||
| self.pred_str = pred_str | ||
| def __lt__(self, other): | ||
| def __lt__(self, other): | ||
| return self.idx < other.idx | ||
@@ -117,7 +117,7 @@ | ||
| os.makedirs(outputpath,exist_ok=True) | ||
| output_path = os.path.join(outputpath,f"viz_{Path(inputname).name}") | ||
| if output_path.split(".")[-1]=="jp2": | ||
| output_path=output_path.split(".")[:-4]+".jpg" | ||
| print(f"[INFO] Saving result on {output_path}") | ||
| pil_image.save(output_path) | ||
| output_filepath = os.path.join(outputpath,f"viz_{Path(inputname).name}") | ||
| if output_filepath.split(".")[-1]=="jp2": | ||
| output_filepath=output_filepath[:-4]+".jpg" | ||
| print(f"[INFO] Saving result on {output_filepath}") | ||
| pil_image.save(output_filepath) | ||
| return detections,classeslist | ||
@@ -132,7 +132,7 @@ | ||
| os.makedirs(outputpath,exist_ok=True) | ||
| output_path = os.path.join(outputpath,f"viz_{Path(inputname).name}") | ||
| if output_path.split(".")[-1]=="jp2": | ||
| output_path=output_path.split(".")[:-4]+".jpg" | ||
| print(f"[INFO] Saving result on {output_path}") | ||
| pil_image.save(output_path) | ||
| output_filepath = os.path.join(outputpath,f"viz_{Path(inputname).name}") | ||
| if output_filepath.split(".")[-1]=="jp2": | ||
| output_filepath=output_filepath[:-4]+".jpg" | ||
| print(f"[INFO] Saving result on {output_filepath}") | ||
| pil_image.save(output_filepath) | ||
| return detections,classeslist | ||
@@ -151,3 +151,3 @@ | ||
| ext=inputpath.split(".")[-1] | ||
| if ext in ["jpg","png","tiff","jp2","tif","jpeg","bmp"]: | ||
| if ext.lower() in ["jpg","png","tiff","jp2","tif","jpeg","bmp"]: | ||
| inputpathlist.append(inputpath) | ||
@@ -161,2 +161,3 @@ if len(inputpathlist)==0: | ||
| detector=get_detector(args) | ||
| recognizer100=get_recognizer(args=args) | ||
@@ -177,3 +178,3 @@ recognizer30=get_recognizer(args=args,weights_path=args.rec_weights30) | ||
| img_h,img_w=img.shape[:2] | ||
| detections,classeslist=inference_on_detector(args=args,inputname=imgname,npimage=img,outputpath=args.output,issaveimg=args.viz) | ||
| detections,classeslist=process_detector(detector,inputname=imgname,npimage=img,outputpath=args.output,issaveimg=args.viz) | ||
| e1=time.time() | ||
@@ -262,5 +263,6 @@ resultobj=[dict(),dict()] | ||
| alltextlist=alltextlist[::-1] | ||
| with open(os.path.join(args.output,os.path.basename(inputpath).split(".")[0]+".xml"),"w",encoding="utf-8") as wf: | ||
| output_stem = os.path.splitext(os.path.basename(inputpath))[0] | ||
| with open(os.path.join(args.output,output_stem+".xml"),"w",encoding="utf-8") as wf: | ||
| wf.write(allxmlstr) | ||
| with open(os.path.join(args.output,os.path.basename(inputpath).split(".")[0]+".json"),"w",encoding="utf-8") as wf: | ||
| with open(os.path.join(args.output,output_stem+".json"),"w",encoding="utf-8") as wf: | ||
| alljsonobj={ | ||
@@ -277,3 +279,3 @@ "contents":[resjsonarray], | ||
| wf.write(alljsonstr) | ||
| with open(os.path.join(args.output,os.path.basename(inputpath).split(".")[0]+".txt"),"w",encoding="utf-8") as wtf: | ||
| with open(os.path.join(args.output,output_stem+".txt"),"w",encoding="utf-8") as wtf: | ||
| wtf.write("\n".join(alltextlist)) | ||
@@ -280,0 +282,0 @@ print("Total calculation time (Detection + Recognition):",time.time()-start) |
+12
-14
@@ -6,2 +6,3 @@ from PIL import Image | ||
| import numpy as np | ||
| import cv2 | ||
| from typing import Tuple, List | ||
@@ -51,21 +52,18 @@ | ||
| def preprocess(self, img: np.ndarray) -> np.ndarray: | ||
| pil_image = Image.fromarray(img) | ||
| if pil_image.height>pil_image.width: | ||
| pil_image =pil_image.transpose(Image.ROTATE_90) | ||
| pil_resized = pil_image.resize((self.input_width, self.input_height)) | ||
| resized = np.array(pil_resized, dtype=np.float32) | ||
| resized = resized[:,:,::-1] | ||
| input_image = resized / 255.0 | ||
| input_image = 2.0*(input_image-0.5) | ||
| h,w=img.shape[:2] | ||
| if h>w: | ||
| img=cv2.rotate(img,cv2.ROTATE_90_COUNTERCLOCKWISE) | ||
| resized=cv2.resize(img,(self.input_width, self.input_height),interpolation=cv2.INTER_LINEAR) | ||
| input_image=np.ascontiguousarray(resized[:,:,::-1]).astype(np.float32) | ||
| input_image/=127.5 | ||
| input_image-=1.0 | ||
| input_image = input_image.transpose(2,0,1) | ||
| input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32) | ||
| return input_tensor | ||
| return input_image[np.newaxis, :, :, :] | ||
| def read(self, img: np.ndarray) -> List: | ||
| if img is None: | ||
| return None | ||
| if img is None or img.size == 0: | ||
| return "" | ||
| input_tensor = self.preprocess(img) | ||
| outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})[0] | ||
| indices = np.argmax(outputs, axis=2)[0] | ||
| indices = np.argmax(outputs[0], axis=1) | ||
| stop_idx = np.where(indices == 0)[0] | ||
@@ -72,0 +70,0 @@ end_pos = stop_idx[0] if stop_idx.size > 0 else len(indices) |
| Metadata-Version: 2.4 | ||
| Name: owocr | ||
| Version: 1.26.0 | ||
| Summary: Optical character recognition for Japanese text | ||
| Version: 1.26.1 | ||
| Summary: Multi-service, multi-platform optical character recognition | ||
| Author-email: AuroraWright <fallingluma@gmail.com> | ||
@@ -19,2 +19,3 @@ License-Expression: GPL-3.0-only | ||
| Classifier: Programming Language :: Python :: 3.13 | ||
| Classifier: Programming Language :: Python :: 3.14 | ||
| Classifier: Programming Language :: Python :: 3 | ||
@@ -77,3 +78,3 @@ Classifier: Programming Language :: Python | ||
| Provides-Extra: azure | ||
| Requires-Dist: azure-ai-vision-imageanalysis; extra == "azure" | ||
| Requires-Dist: azure-ai-documentintelligence; extra == "azure" | ||
| Provides-Extra: ndlocrlite | ||
@@ -85,2 +86,3 @@ Requires-Dist: lxml; extra == "ndlocrlite" | ||
| Requires-Dist: tqdm; extra == "ndlocrlite" | ||
| Requires-Dist: opencv-python-headless; extra == "ndlocrlite" | ||
| Dynamic: license-file | ||
@@ -117,3 +119,3 @@ | ||
| OwOCR has been tested on Python 3.11, 3.12 and 3.13. It can be installed with `pip install owocr` after you install Python. You also need to have one or more OCR engines, check the list below for instructions. I recommend installing at least Google Lens on any operating system, and OneOCR if you are on Windows. Bing is pre-installed, Apple Vision and Live Text come pre-installed on macOS. | ||
| OwOCR has been tested on Python 3.11 to 3.14. It can be installed with `pip install owocr` after you install Python. You also need to have one or more OCR engines, check the list below for instructions. I recommend installing at least Google Lens on any operating system, and OneOCR if you are on Windows. Bing is pre-installed, Apple Vision and Live Text come pre-installed on macOS. | ||
@@ -190,3 +192,3 @@ ## Usage (terminal) | ||
| - Google Vision - You need a service account .json file named google_vision.json in `user directory/.config/` → Terminal: install with `pip install "owocr[gvision]"`, key: `g` | ||
| - Azure Image Analysis - You need to specify an api key and an endpoint in the config file → Terminal: install with `pip install "owocr[azure]"`, key: `v` | ||
| - Azure Document Intelligence - You need to specify an api key and an endpoint in the config file → Terminal: install with `pip install "owocr[azure]"`, key: `v` | ||
| - OCRSpace - You need to specify an api key in the config file. → Terminal key: `o` | ||
@@ -193,0 +195,0 @@ |
@@ -27,3 +27,3 @@ jaconv | ||
| [azure] | ||
| azure-ai-vision-imageanalysis | ||
| azure-ai-documentintelligence | ||
@@ -61,2 +61,3 @@ [easyocr] | ||
| tqdm | ||
| opencv-python-headless | ||
@@ -63,0 +64,0 @@ [oneocr] |
@@ -1,2 +0,2 @@ | ||
| __version__ = (1, 26, 0) | ||
| __version__ = (1, 26, 1) | ||
| __version_string__ = '.'.join(map(str, __version__)) |
+15
-22
@@ -210,3 +210,3 @@ import configparser | ||
| width, height = 700, 700 | ||
| width, height = 750, 750 | ||
| window_scale = 1.0 | ||
@@ -249,2 +249,3 @@ if sys.platform == 'win32': | ||
| ('delete_images', 'bool', 'Delete images from the folder after processing'), | ||
| ('skip_existing_images', 'bool', 'Ignore images already in the folder when owocr starts'), | ||
| ('pause_at_startup', 'bool', 'Pause when owocr starts'), | ||
@@ -747,19 +748,9 @@ ('notifications', 'bool', 'Show OS notifications with the detected text'), | ||
| def _open_picker(self, option, var): | ||
| if option in ['read_from', 'read_from_secondary']: | ||
| folder_path = filedialog.askdirectory( | ||
| title=f'Select directory for {option}', | ||
| mustexist=True | ||
| ) | ||
| if folder_path: | ||
| var.set(folder_path) | ||
| self._update_general_state() | ||
| elif option == 'write_to': | ||
| file_path = filedialog.asksaveasfilename( | ||
| title='Select output text file', | ||
| defaultextension='.txt', | ||
| filetypes=[('Text files', '*.txt'), ('All files', '*.*')] | ||
| ) | ||
| if file_path: | ||
| var.set(file_path) | ||
| self._update_general_state() | ||
| folder_path = filedialog.askdirectory( | ||
| title=f'Select directory for {option}', | ||
| mustexist=True | ||
| ) | ||
| if folder_path: | ||
| var.set(folder_path) | ||
| self._update_general_state() | ||
@@ -802,6 +793,8 @@ def _update_ui_state(self): | ||
| widget_info = self.widgets['delete_images'] | ||
| frame = widget_info.get('frame') | ||
| show_delete = self._is_folder_file_selected('read_from') or self._is_folder_file_selected('read_from_secondary') | ||
| frame.grid() if show_delete else frame.grid_remove() | ||
| show_folder_options = self._is_folder_file_selected('read_from') or self._is_folder_file_selected('read_from_secondary') | ||
| folder_options = ['delete_images', 'skip_existing_images'] | ||
| for option in folder_options: | ||
| widget_info = self.widgets[option] | ||
| frame = widget_info.get('frame') | ||
| frame.grid() if show_folder_options else frame.grid_remove() | ||
@@ -808,0 +801,0 @@ dropdown_options = ['read_from', 'read_from_secondary', 'write_to'] |
+5
-2
@@ -18,3 +18,3 @@ import os | ||
| It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window. | ||
| Recognized text can be either saved to system clipboard, appended to a text file or sent via a websocket. | ||
| Recognized text can be either saved to system clipboard, written to a text/json file or sent via a websocket. | ||
| ''')) | ||
@@ -27,3 +27,3 @@ | ||
| parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS, | ||
| help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.') | ||
| help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a directory to write text/json files to.') | ||
| parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS, | ||
@@ -37,2 +37,4 @@ help='OCR engine to use. Available: "mangaocr", "mangaocrs", "glens", "bing", "gvision", "screenai", "avision", "alivetext", "azure", "winrtocr", "oneocr", "ndlocrlite", "easyocr", "rapidocr", "ocrspace".') | ||
| help='Delete image files after processing when reading from a directory.') | ||
| parser.add_argument('-se', '--skip_existing_images', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, | ||
| help='Skip existing images at startup when reading from a directory.') | ||
| parser.add_argument('-n', '--notifications', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, | ||
@@ -115,2 +117,3 @@ help='Show an operating system notification with the detected text. Will be ignored when reading with screen capture and periodic screenshots.') | ||
| 'delete_images': False, | ||
| 'skip_existing_images': False, | ||
| 'engines': [], | ||
@@ -117,0 +120,0 @@ 'delay_seconds': 0.5, |
+7
-5
| Metadata-Version: 2.4 | ||
| Name: owocr | ||
| Version: 1.26.0 | ||
| Summary: Optical character recognition for Japanese text | ||
| Version: 1.26.1 | ||
| Summary: Multi-service, multi-platform optical character recognition | ||
| Author-email: AuroraWright <fallingluma@gmail.com> | ||
@@ -19,2 +19,3 @@ License-Expression: GPL-3.0-only | ||
| Classifier: Programming Language :: Python :: 3.13 | ||
| Classifier: Programming Language :: Python :: 3.14 | ||
| Classifier: Programming Language :: Python :: 3 | ||
@@ -77,3 +78,3 @@ Classifier: Programming Language :: Python | ||
| Provides-Extra: azure | ||
| Requires-Dist: azure-ai-vision-imageanalysis; extra == "azure" | ||
| Requires-Dist: azure-ai-documentintelligence; extra == "azure" | ||
| Provides-Extra: ndlocrlite | ||
@@ -85,2 +86,3 @@ Requires-Dist: lxml; extra == "ndlocrlite" | ||
| Requires-Dist: tqdm; extra == "ndlocrlite" | ||
| Requires-Dist: opencv-python-headless; extra == "ndlocrlite" | ||
| Dynamic: license-file | ||
@@ -117,3 +119,3 @@ | ||
| OwOCR has been tested on Python 3.11, 3.12 and 3.13. It can be installed with `pip install owocr` after you install Python. You also need to have one or more OCR engines, check the list below for instructions. I recommend installing at least Google Lens on any operating system, and OneOCR if you are on Windows. Bing is pre-installed, Apple Vision and Live Text come pre-installed on macOS. | ||
| OwOCR has been tested on Python 3.11 to 3.14. It can be installed with `pip install owocr` after you install Python. You also need to have one or more OCR engines, check the list below for instructions. I recommend installing at least Google Lens on any operating system, and OneOCR if you are on Windows. Bing is pre-installed, Apple Vision and Live Text come pre-installed on macOS. | ||
@@ -190,3 +192,3 @@ ## Usage (terminal) | ||
| - Google Vision - You need a service account .json file named google_vision.json in `user directory/.config/` → Terminal: install with `pip install "owocr[gvision]"`, key: `g` | ||
| - Azure Image Analysis - You need to specify an api key and an endpoint in the config file → Terminal: install with `pip install "owocr[azure]"`, key: `v` | ||
| - Azure Document Intelligence - You need to specify an api key and an endpoint in the config file → Terminal: install with `pip install "owocr[azure]"`, key: `v` | ||
| - OCRSpace - You need to specify an api key in the config file. → Terminal key: `o` | ||
@@ -193,0 +195,0 @@ |
+6
-4
@@ -8,5 +8,5 @@ [build-system] | ||
| dynamic = ["version"] | ||
| description = "Optical character recognition for Japanese text" | ||
| description = "Multi-service, multi-platform optical character recognition" | ||
| readme = "README.md" | ||
| requires-python = ">=3.11" # onnxruntime is not yet available for Python 3.14 | ||
| requires-python = ">=3.11" | ||
| authors = [ | ||
@@ -32,2 +32,3 @@ {name = "AuroraWright", email = "fallingluma@gmail.com"} | ||
| "Programming Language :: Python :: 3.13", | ||
| "Programming Language :: Python :: 3.14", | ||
| "Programming Language :: Python :: 3", | ||
@@ -113,3 +114,3 @@ "Programming Language :: Python", | ||
| azure = [ | ||
| "azure-ai-vision-imageanalysis" | ||
| "azure-ai-documentintelligence" | ||
| ] | ||
@@ -122,3 +123,4 @@ | ||
| "networkx", | ||
| "tqdm" | ||
| "tqdm", | ||
| "opencv-python-headless" | ||
| ] | ||
@@ -125,0 +127,0 @@ |
+2
-2
@@ -30,3 +30,3 @@ <div align="center"> | ||
| OwOCR has been tested on Python 3.11, 3.12 and 3.13. It can be installed with `pip install owocr` after you install Python. You also need to have one or more OCR engines, check the list below for instructions. I recommend installing at least Google Lens on any operating system, and OneOCR if you are on Windows. Bing is pre-installed, Apple Vision and Live Text come pre-installed on macOS. | ||
| OwOCR has been tested on Python 3.11 to 3.14. It can be installed with `pip install owocr` after you install Python. You also need to have one or more OCR engines, check the list below for instructions. I recommend installing at least Google Lens on any operating system, and OneOCR if you are on Windows. Bing is pre-installed, Apple Vision and Live Text come pre-installed on macOS. | ||
@@ -103,3 +103,3 @@ ## Usage (terminal) | ||
| - Google Vision - You need a service account .json file named google_vision.json in `user directory/.config/` → Terminal: install with `pip install "owocr[gvision]"`, key: `g` | ||
| - Azure Image Analysis - You need to specify an api key and an endpoint in the config file → Terminal: install with `pip install "owocr[azure]"`, key: `v` | ||
| - Azure Document Intelligence - You need to specify an api key and an endpoint in the config file → Terminal: install with `pip install "owocr[azure]"`, key: `v` | ||
| - OCRSpace - You need to specify an api key in the config file. → Terminal key: `o` | ||
@@ -106,0 +106,0 @@ |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
1187044
0.25%17899
0.28%