@@ -6,2 +6,3 @@ from PIL import Image, ImageDraw
		import numpy as np
		import cv2
		from typing import Tuple, List
		@@ -57,15 +58,13 @@ import xml.etree.ElementTree as ET
		max_wh=max(img.shape[0],img.shape[1])
		paddedimg=np.zeros((max_wh,max_wh,3)).astype(np.uint8)
		paddedimg[:img.shape[0],:img.shape[1],:]=img.copy()
		pil_image = Image.fromarray(paddedimg)
		self.image_width,self.image_height = pil_image.size
		pil_resized = pil_image.resize((self.input_width, self.input_height))
		resized=np.array(pil_resized)
		#resized=resized[:,:,::-1]

		# Scale input pixel value to 0 to 1
		resized = resized / 255.0
		paddedimg=np.zeros((max_wh,max_wh,3),dtype=np.uint8)
		paddedimg[:img.shape[0],:img.shape[1],:]=img
		self.image_width=max_wh
		self.image_height=max_wh
		resized=cv2.resize(paddedimg,(self.input_width, self.input_height),interpolation=cv2.INTER_CUBIC)
		input_image=resized.astype(np.float32)
		input_image/=255.0
		mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
		std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
		input_image= (resized-mean) / std
		input_image-=mean
		input_image/=std
		input_image = input_image.transpose(2,0,1)
		@@ -72,0 +71,0 @@ input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32)

+18

-16

ndlocr_lite/ocr.py

		@@ -26,3 +26,3 @@ import sys
		self.pred_str = pred_str
		def __lt__(self, other):
		def __lt__(self, other):
		return self.idx < other.idx
		@@ -117,7 +117,7 @@
		os.makedirs(outputpath,exist_ok=True)
		output_path = os.path.join(outputpath,f"viz_{Path(inputname).name}")
		if output_path.split(".")[-1]=="jp2":
		output_path=output_path.split(".")[:-4]+".jpg"
		print(f"[INFO] Saving result on {output_path}")
		pil_image.save(output_path)
		output_filepath = os.path.join(outputpath,f"viz_{Path(inputname).name}")
		if output_filepath.split(".")[-1]=="jp2":
		output_filepath=output_filepath[:-4]+".jpg"
		print(f"[INFO] Saving result on {output_filepath}")
		pil_image.save(output_filepath)
		return detections,classeslist
		@@ -132,7 +132,7 @@
		os.makedirs(outputpath,exist_ok=True)
		output_path = os.path.join(outputpath,f"viz_{Path(inputname).name}")
		if output_path.split(".")[-1]=="jp2":
		output_path=output_path.split(".")[:-4]+".jpg"
		print(f"[INFO] Saving result on {output_path}")
		pil_image.save(output_path)
		output_filepath = os.path.join(outputpath,f"viz_{Path(inputname).name}")
		if output_filepath.split(".")[-1]=="jp2":
		output_filepath=output_filepath[:-4]+".jpg"
		print(f"[INFO] Saving result on {output_filepath}")
		pil_image.save(output_filepath)
		return detections,classeslist
		@@ -151,3 +151,3 @@
		ext=inputpath.split(".")[-1]
		if ext in ["jpg","png","tiff","jp2","tif","jpeg","bmp"]:
		if ext.lower() in ["jpg","png","tiff","jp2","tif","jpeg","bmp"]:
		inputpathlist.append(inputpath)
		@@ -161,2 +161,3 @@ if len(inputpathlist)==0:

		detector=get_detector(args)
		recognizer100=get_recognizer(args=args)
		@@ -177,3 +178,3 @@ recognizer30=get_recognizer(args=args,weights_path=args.rec_weights30)
		img_h,img_w=img.shape[:2]
		detections,classeslist=inference_on_detector(args=args,inputname=imgname,npimage=img,outputpath=args.output,issaveimg=args.viz)
		detections,classeslist=process_detector(detector,inputname=imgname,npimage=img,outputpath=args.output,issaveimg=args.viz)
		e1=time.time()
		@@ -262,5 +263,6 @@ resultobj=[dict(),dict()]
		alltextlist=alltextlist[::-1]
		with open(os.path.join(args.output,os.path.basename(inputpath).split(".")[0]+".xml"),"w",encoding="utf-8") as wf:
		output_stem = os.path.splitext(os.path.basename(inputpath))[0]
		with open(os.path.join(args.output,output_stem+".xml"),"w",encoding="utf-8") as wf:
		wf.write(allxmlstr)
		with open(os.path.join(args.output,os.path.basename(inputpath).split(".")[0]+".json"),"w",encoding="utf-8") as wf:
		with open(os.path.join(args.output,output_stem+".json"),"w",encoding="utf-8") as wf:
		alljsonobj={
		@@ -277,3 +279,3 @@ "contents":[resjsonarray],
		wf.write(alljsonstr)
		with open(os.path.join(args.output,os.path.basename(inputpath).split(".")[0]+".txt"),"w",encoding="utf-8") as wtf:
		with open(os.path.join(args.output,output_stem+".txt"),"w",encoding="utf-8") as wtf:
		wtf.write("\n".join(alltextlist))
		@@ -280,0 +282,0 @@ print("Total calculation time (Detection + Recognition):",time.time()-start)

+12

-14

ndlocr_lite/parseq.py

		@@ -6,2 +6,3 @@ from PIL import Image
		import numpy as np
		import cv2
		from typing import Tuple, List
		@@ -51,21 +52,18 @@
		def preprocess(self, img: np.ndarray) -> np.ndarray:
		pil_image = Image.fromarray(img)
		if pil_image.height>pil_image.width:
		pil_image =pil_image.transpose(Image.ROTATE_90)
		pil_resized = pil_image.resize((self.input_width, self.input_height))

		resized = np.array(pil_resized, dtype=np.float32)
		resized = resized[:,:,::-1]
		input_image = resized / 255.0
		input_image = 2.0*(input_image-0.5)
		h,w=img.shape[:2]
		if h>w:
		img=cv2.rotate(img,cv2.ROTATE_90_COUNTERCLOCKWISE)
		resized=cv2.resize(img,(self.input_width, self.input_height),interpolation=cv2.INTER_LINEAR)
		input_image=np.ascontiguousarray(resized[:,:,::-1]).astype(np.float32)
		input_image/=127.5
		input_image-=1.0
		input_image = input_image.transpose(2,0,1)
		input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32)
		return input_tensor
		return input_image[np.newaxis, :, :, :]

		def read(self, img: np.ndarray) -> List:
		if img is None:
		return None
		if img is None or img.size == 0:
		return ""
		input_tensor = self.preprocess(img)
		outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})[0]
		indices = np.argmax(outputs, axis=2)[0]
		indices = np.argmax(outputs[0], axis=1)
		stop_idx = np.where(indices == 0)[0]
		@@ -72,0 +70,0 @@ end_pos = stop_idx[0] if stop_idx.size > 0 else len(indices)

+7

-5

owocr.egg-info/PKG-INFO

		Metadata-Version: 2.4
		Name: owocr
		Version: 1.26.0
		Summary: Optical character recognition for Japanese text
		Version: 1.26.1
		Summary: Multi-service, multi-platform optical character recognition
		Author-email: AuroraWright <fallingluma@gmail.com>
		@@ -19,2 +19,3 @@ License-Expression: GPL-3.0-only
		Classifier: Programming Language :: Python :: 3.13
		Classifier: Programming Language :: Python :: 3.14
		Classifier: Programming Language :: Python :: 3
		@@ -77,3 +78,3 @@ Classifier: Programming Language :: Python
		Provides-Extra: azure
		Requires-Dist: azure-ai-vision-imageanalysis; extra == "azure"
		Requires-Dist: azure-ai-documentintelligence; extra == "azure"
		Provides-Extra: ndlocrlite
		@@ -85,2 +86,3 @@ Requires-Dist: lxml; extra == "ndlocrlite"
		Requires-Dist: tqdm; extra == "ndlocrlite"
		Requires-Dist: opencv-python-headless; extra == "ndlocrlite"
		Dynamic: license-file
		@@ -117,3 +119,3 @@

		OwOCR has been tested on Python 3.11, 3.12 and 3.13. It can be installed with `pip install owocr` after you install Python. You also need to have one or more OCR engines, check the list below for instructions. I recommend installing at least Google Lens on any operating system, and OneOCR if you are on Windows. Bing is pre-installed, Apple Vision and Live Text come pre-installed on macOS.
		OwOCR has been tested on Python 3.11 to 3.14. It can be installed with `pip install owocr` after you install Python. You also need to have one or more OCR engines, check the list below for instructions. I recommend installing at least Google Lens on any operating system, and OneOCR if you are on Windows. Bing is pre-installed, Apple Vision and Live Text come pre-installed on macOS.

		@@ -190,3 +192,3 @@ ## Usage (terminal)
		- Google Vision - You need a service account .json file named google_vision.json in `user directory/.config/` → Terminal: install with `pip install "owocr[gvision]"`, key: `g`
		- Azure Image Analysis - You need to specify an api key and an endpoint in the config file → Terminal: install with `pip install "owocr[azure]"`, key: `v`
		- Azure Document Intelligence - You need to specify an api key and an endpoint in the config file → Terminal: install with `pip install "owocr[azure]"`, key: `v`
		- OCRSpace - You need to specify an api key in the config file. → Terminal key: `o`
		@@ -193,0 +195,0 @@

+2

-1

owocr.egg-info/requires.txt

		@@ -27,3 +27,3 @@ jaconv
		[azure]
		azure-ai-vision-imageanalysis
		azure-ai-documentintelligence

		@@ -61,2 +61,3 @@ [easyocr]
		tqdm
		opencv-python-headless

		@@ -63,0 +64,0 @@ [oneocr]

+1

-1

owocr/__init__.py

		@@ -1,2 +0,2 @@
		__version__ = (1, 26, 0)
		__version__ = (1, 26, 1)
		__version_string__ = '.'.join(map(str, __version__))

+15

-22

owocr/config_editor.py

		@@ -210,3 +210,3 @@ import configparser

		width, height = 700, 700
		width, height = 750, 750
		window_scale = 1.0
		@@ -249,2 +249,3 @@ if sys.platform == 'win32':
		('delete_images', 'bool', 'Delete images from the folder after processing'),
		('skip_existing_images', 'bool', 'Ignore images already in the folder when owocr starts'),
		('pause_at_startup', 'bool', 'Pause when owocr starts'),
		@@ -747,19 +748,9 @@ ('notifications', 'bool', 'Show OS notifications with the detected text'),
		def _open_picker(self, option, var):
		if option in ['read_from', 'read_from_secondary']:
		folder_path = filedialog.askdirectory(
		title=f'Select directory for {option}',
		mustexist=True
		)
		if folder_path:
		var.set(folder_path)
		self._update_general_state()
		elif option == 'write_to':
		file_path = filedialog.asksaveasfilename(
		title='Select output text file',
		defaultextension='.txt',
		filetypes=[('Text files', '.txt'), ('All files', '.*')]
		)
		if file_path:
		var.set(file_path)
		self._update_general_state()
		folder_path = filedialog.askdirectory(
		title=f'Select directory for {option}',
		mustexist=True
		)
		if folder_path:
		var.set(folder_path)
		self._update_general_state()

		@@ -802,6 +793,8 @@ def _update_ui_state(self):

		widget_info = self.widgets['delete_images']
		frame = widget_info.get('frame')
		show_delete = self._is_folder_file_selected('read_from') or self._is_folder_file_selected('read_from_secondary')
		frame.grid() if show_delete else frame.grid_remove()
		show_folder_options = self._is_folder_file_selected('read_from') or self._is_folder_file_selected('read_from_secondary')
		folder_options = ['delete_images', 'skip_existing_images']
		for option in folder_options:
		widget_info = self.widgets[option]
		frame = widget_info.get('frame')
		frame.grid() if show_folder_options else frame.grid_remove()

		@@ -808,0 +801,0 @@ dropdown_options = ['read_from', 'read_from_secondary', 'write_to']

+5

-2

owocr/config.py

		@@ -18,3 +18,3 @@ import os
		It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window.
		Recognized text can be either saved to system clipboard, appended to a text file or sent via a websocket.
		Recognized text can be either saved to system clipboard, written to a text/json file or sent via a websocket.
		'''))
		@@ -27,3 +27,3 @@
		parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
		help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
		help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a directory to write text/json files to.')
		parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
		@@ -37,2 +37,4 @@ help='OCR engine to use. Available: "mangaocr", "mangaocrs", "glens", "bing", "gvision", "screenai", "avision", "alivetext", "azure", "winrtocr", "oneocr", "ndlocrlite", "easyocr", "rapidocr", "ocrspace".')
		help='Delete image files after processing when reading from a directory.')
		parser.add_argument('-se', '--skip_existing_images', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
		help='Skip existing images at startup when reading from a directory.')
		parser.add_argument('-n', '--notifications', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
		@@ -115,2 +117,3 @@ help='Show an operating system notification with the detected text. Will be ignored when reading with screen capture and periodic screenshots.')
		'delete_images': False,
		'skip_existing_images': False,
		'engines': [],
		@@ -117,0 +120,0 @@ 'delay_seconds': 0.5,

+7

-5

PKG-INFO

		Metadata-Version: 2.4
		Name: owocr
		Version: 1.26.0
		Summary: Optical character recognition for Japanese text
		Version: 1.26.1
		Summary: Multi-service, multi-platform optical character recognition
		Author-email: AuroraWright <fallingluma@gmail.com>
		@@ -19,2 +19,3 @@ License-Expression: GPL-3.0-only
		Classifier: Programming Language :: Python :: 3.13
		Classifier: Programming Language :: Python :: 3.14
		Classifier: Programming Language :: Python :: 3
		@@ -77,3 +78,3 @@ Classifier: Programming Language :: Python
		Provides-Extra: azure
		Requires-Dist: azure-ai-vision-imageanalysis; extra == "azure"
		Requires-Dist: azure-ai-documentintelligence; extra == "azure"
		Provides-Extra: ndlocrlite
		@@ -85,2 +86,3 @@ Requires-Dist: lxml; extra == "ndlocrlite"
		Requires-Dist: tqdm; extra == "ndlocrlite"
		Requires-Dist: opencv-python-headless; extra == "ndlocrlite"
		Dynamic: license-file
		@@ -117,3 +119,3 @@

		OwOCR has been tested on Python 3.11, 3.12 and 3.13. It can be installed with `pip install owocr` after you install Python. You also need to have one or more OCR engines, check the list below for instructions. I recommend installing at least Google Lens on any operating system, and OneOCR if you are on Windows. Bing is pre-installed, Apple Vision and Live Text come pre-installed on macOS.
		OwOCR has been tested on Python 3.11 to 3.14. It can be installed with `pip install owocr` after you install Python. You also need to have one or more OCR engines, check the list below for instructions. I recommend installing at least Google Lens on any operating system, and OneOCR if you are on Windows. Bing is pre-installed, Apple Vision and Live Text come pre-installed on macOS.

		@@ -190,3 +192,3 @@ ## Usage (terminal)
		- Google Vision - You need a service account .json file named google_vision.json in `user directory/.config/` → Terminal: install with `pip install "owocr[gvision]"`, key: `g`
		- Azure Image Analysis - You need to specify an api key and an endpoint in the config file → Terminal: install with `pip install "owocr[azure]"`, key: `v`
		- Azure Document Intelligence - You need to specify an api key and an endpoint in the config file → Terminal: install with `pip install "owocr[azure]"`, key: `v`
		- OCRSpace - You need to specify an api key in the config file. → Terminal key: `o`
		@@ -193,0 +195,0 @@

+6

-4

pyproject.toml

		@@ -8,5 +8,5 @@ [build-system]
		dynamic = ["version"]
		description = "Optical character recognition for Japanese text"
		description = "Multi-service, multi-platform optical character recognition"
		readme = "README.md"
		requires-python = ">=3.11" # onnxruntime is not yet available for Python 3.14
		requires-python = ">=3.11"
		authors = [
		@@ -32,2 +32,3 @@ {name = "AuroraWright", email = "fallingluma@gmail.com"}
		"Programming Language :: Python :: 3.13",
		"Programming Language :: Python :: 3.14",
		"Programming Language :: Python :: 3",
		@@ -113,3 +114,3 @@ "Programming Language :: Python",
		azure = [
		"azure-ai-vision-imageanalysis"
		"azure-ai-documentintelligence"
		]
		@@ -122,3 +123,4 @@
		"networkx",
		"tqdm"
		"tqdm",
		"opencv-python-headless"
		]
		@@ -125,0 +127,0 @@

+2

-2

README.md

		@@ -30,3 +30,3 @@ <div align="center">

		OwOCR has been tested on Python 3.11, 3.12 and 3.13. It can be installed with `pip install owocr` after you install Python. You also need to have one or more OCR engines, check the list below for instructions. I recommend installing at least Google Lens on any operating system, and OneOCR if you are on Windows. Bing is pre-installed, Apple Vision and Live Text come pre-installed on macOS.
		OwOCR has been tested on Python 3.11 to 3.14. It can be installed with `pip install owocr` after you install Python. You also need to have one or more OCR engines, check the list below for instructions. I recommend installing at least Google Lens on any operating system, and OneOCR if you are on Windows. Bing is pre-installed, Apple Vision and Live Text come pre-installed on macOS.

		@@ -103,3 +103,3 @@ ## Usage (terminal)
		- Google Vision - You need a service account .json file named google_vision.json in `user directory/.config/` → Terminal: install with `pip install "owocr[gvision]"`, key: `g`
		- Azure Image Analysis - You need to specify an api key and an endpoint in the config file → Terminal: install with `pip install "owocr[azure]"`, key: `v`
		- Azure Document Intelligence - You need to specify an api key and an endpoint in the config file → Terminal: install with `pip install "owocr[azure]"`, key: `v`
		- OCRSpace - You need to specify an api key in the config file. → Terminal key: `o`
		@@ -106,0 +106,0 @@

owocr/ocr.py

Sorry, the diff of this file is too big to display

owocr/run.py

Sorry, the diff of this file is too big to display

owocr - pypi Package Compare versions

Improved metrics