Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

tesseract.js

Package Overview
Dependencies
Maintainers
3
Versions
68
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

tesseract.js - npm Package Compare versions

Comparing version 1.0.10 to 1.0.11

146

dist/tesseract.js
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Tesseract = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
'use strict';
/* eslint-disable no-unused-vars */
var hasOwnProperty = Object.prototype.hasOwnProperty;
var propIsEnumerable = Object.prototype.propertyIsEnumerable;
function toObject(val) {
if (val === null || val === undefined) {
throw new TypeError('Object.assign cannot be called with null or undefined');
}
return Object(val);
}
function shouldUseNative() {
try {
if (!Object.assign) {
return false;
}
// Detect buggy property enumeration order in older V8 versions.
// https://bugs.chromium.org/p/v8/issues/detail?id=4118
var test1 = new String('abc'); // eslint-disable-line
test1[5] = 'de';
if (Object.getOwnPropertyNames(test1)[0] === '5') {
return false;
}
// https://bugs.chromium.org/p/v8/issues/detail?id=3056
var test2 = {};
for (var i = 0; i < 10; i++) {
test2['_' + String.fromCharCode(i)] = i;
}
var order2 = Object.getOwnPropertyNames(test2).map(function (n) {
return test2[n];
});
if (order2.join('') !== '0123456789') {
return false;
}
// https://bugs.chromium.org/p/v8/issues/detail?id=3056
var test3 = {};
'abcdefghijklmnopqrst'.split('').forEach(function (letter) {
test3[letter] = letter;
});
if (Object.keys(Object.assign({}, test3)).join('') !==
'abcdefghijklmnopqrst') {
return false;
}
return true;
} catch (e) {
// We don't expect any of the above to throw, but better to be safe.
return false;
}
}
module.exports = shouldUseNative() ? Object.assign : function (target, source) {
var from;
var to = toObject(target);
var symbols;
for (var s = 1; s < arguments.length; s++) {
from = Object(arguments[s]);
for (var key in from) {
if (hasOwnProperty.call(from, key)) {
to[key] = from[key];
}
}
if (Object.getOwnPropertySymbols) {
symbols = Object.getOwnPropertySymbols(from);
for (var i = 0; i < symbols.length; i++) {
if (propIsEnumerable.call(from, symbols[i])) {
to[symbols[i]] = from[symbols[i]];
}
}
}
}
return to;
};
},{}],2:[function(require,module,exports){
// shim for using process in browser

@@ -257,3 +172,7 @@ var process = module.exports = {};

process.emit = noop;
process.prependListener = noop;
process.prependOnceListener = noop;
process.listeners = function (name) { return [] }
process.binding = function (name) {

@@ -269,6 +188,6 @@ throw new Error('process.binding is not supported');

},{}],3:[function(require,module,exports){
},{}],2:[function(require,module,exports){
module.exports={
"name": "tesseract.js",
"version": "1.0.10",
"version": "1.0.11",
"description": "Pure Javascript Multilingual OCR",

@@ -298,2 +217,3 @@ "main": "src/index.js",

"file-type": "^3.8.0",
"isomorphic-fetch": "^2.2.1",
"is-url": "^1.2.2",

@@ -317,3 +237,3 @@ "jpeg-js": "^0.2.0",

},{}],4:[function(require,module,exports){
},{}],3:[function(require,module,exports){
(function (process){

@@ -323,5 +243,5 @@ 'use strict';

var defaultOptions = {
// workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'
// workerPath: 'https://rawcdn.githack.com/naptha/tesseract.js/0.2.0/dist/worker.js',
corePath: 'https://rawcdn.githack.com/naptha/tesseract.js-core/0.1.0/index.js',
langPath: 'https://rawcdn.githack.com/naptha/tessdata/gh-pages/3.02/'
};

@@ -334,3 +254,3 @@

var version = require('../../package.json').version;
defaultOptions.workerPath = 'https://cdn.rawgit.com/naptha/tesseract.js/' + version + '/dist/worker.js';
defaultOptions.workerPath = 'https://rawcdn.githack.com/naptha/tesseract.js/' + version + '/dist/worker.js';
}

@@ -427,3 +347,3 @@

}).call(this,require('_process'))
},{"../../package.json":3,"_process":2}],5:[function(require,module,exports){
},{"../../package.json":2,"_process":1}],4:[function(require,module,exports){
"use strict";

@@ -495,3 +415,3 @@

},{}],6:[function(require,module,exports){
},{}],5:[function(require,module,exports){
'use strict';

@@ -607,3 +527,3 @@

},{"../node/index.js":4}],7:[function(require,module,exports){
},{"../node/index.js":3}],6:[function(require,module,exports){
'use strict';

@@ -618,8 +538,8 @@

var TesseractJob = require('./common/job');
var objectAssign = require('object-assign');
var version = require('../package.json').version;
function create(workerOptions) {
workerOptions = workerOptions || {};
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions));
function create() {
var workerOptions = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
var worker = new TesseractWorker(Object.assign({}, adapter.defaultOptions, workerOptions));
worker.create = create;

@@ -642,12 +562,10 @@ worker.version = version;

key: 'recognize',
value: function recognize(image, options) {
value: function recognize(image) {
var _this = this;
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
return this._delay(function (job) {
if (typeof options === 'string') {
options = { lang: options };
} else {
options = options || {};
options.lang = options.lang || 'eng';
}
if (typeof options === 'string') options = { lang: options };
options.lang = options.lang || 'eng';

@@ -659,6 +577,7 @@ job._send('recognize', { image: image, options: options, workerOptions: _this.workerOptions });

key: 'detect',
value: function detect(image, options) {
value: function detect(image) {
var _this2 = this;
options = options || {};
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
return this._delay(function (job) {

@@ -673,2 +592,4 @@ job._send('detect', { image: image, options: options, workerOptions: _this2.workerOptions });

this.worker = null;
this._currentJob = null;
this._queue = [];
}

@@ -695,3 +616,3 @@ }, {

this._currentJob = null;
if (this._queue.length > 0) {
if (this._queue.length) {
this._queue[0]();

@@ -703,3 +624,2 @@ }

value: function _recv(packet) {
if (packet.status === 'resolve' && packet.action === 'recognize') {

@@ -720,7 +640,5 @@ packet.data = circularize(packet.data);

var DefaultTesseract = create();
module.exports = create();
module.exports = DefaultTesseract;
},{"../package.json":3,"./common/circularize.js":5,"./common/job":6,"./node/index.js":4,"object-assign":1}]},{},[7])(7)
},{"../package.json":2,"./common/circularize.js":4,"./common/job":5,"./node/index.js":3}]},{},[6])(6)
});

@@ -28,2 +28,3 @@ # Tesseract Languages

| 'eus' | Basque |
| 'fas' |Persian (Farsi) |
| 'fin' | Finnish |

@@ -30,0 +31,0 @@ | 'fra' | French |

@@ -1,14 +0,15 @@

var path = require('path');
var Tesseract = require('../../') // replace this with require('tesseract.js')
var image = path.resolve(__dirname, 'cosmic.png');
// replace this with require('tesseract.js')
var Tesseract = require('../../'),
image = require('path').resolve(__dirname, 'cosmic.png');
Tesseract.recognize(image)
.then(data => {
console.log('then\n', data.text)
})
.catch(err => {
console.log('catch\n', err);
})
.finally(e => {
console.log('finally\n');
});
.then(data => {
console.log('then\n', data.text)
})
.catch(err => {
console.log('catch\n', err);
})
.finally(e => {
console.log('finally\n');
process.exit();
});

@@ -1,11 +0,12 @@

var path = require('path');
var Tesseract = require('../../') // replace this with require('tesseract.js')
var image = path.resolve(__dirname, 'cosmic.png');
// replace this with require('tesseract.js')
var Tesseract = require('../../'),
image = require('path').resolve(__dirname, 'cosmic.png');
Tesseract.detect(image)
.progress(function(info){
console.log(info)
})
.then(function(data){
console.log('done', data)
})
.progress(function(info){
console.log(info);
})
.then(function(data){
console.log('done', data);
process.exit();
})
{
"name": "tesseract.js",
"version": "1.0.10",
"version": "1.0.11",
"description": "Pure Javascript Multilingual OCR",

@@ -28,2 +28,3 @@ "main": "src/index.js",

"file-type": "^3.8.0",
"isomorphic-fetch": "^2.2.1",
"is-url": "^1.2.2",

@@ -30,0 +31,0 @@ "jpeg-js": "^0.2.0",

# [Tesseract.js](http://tesseract.projectnaptha.com/)
[![npm version](https://badge.fury.io/js/tesseract.js.svg)](https://badge.fury.io/js/tesseract.js)
[![Beerpay](https://beerpay.io/naptha/tesseract.js/badge.svg)](https://beerpay.io/naptha/tesseract.js)
[![NPM version][tesseractjs-npm-image]][tesseractjs-npm-url]
[tesseractjs-npm-image]: https://img.shields.io/npm/v/tesseract.js.svg
[tesseractjs-npm-url]: https://npmjs.org/package/tesseract.js
Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/))
<!-- Under the hood, Tesseract.js wraps [tesseract.js-core](https://github.com/naptha/tesseract.js-core), an [emscripten](https://github.com/kripken/emscripten) port of the [Tesseract OCR Engine](https://github.com/tesseract-ocr/tesseract).
-->
[![fancy demo gif](https://github.com/naptha/tesseract.js/blob/master/demo.gif)](http://tesseract.projectnaptha.com)
[![fancy demo gif](./demo.gif "Demo")](http://tesseract.projectnaptha.com)
Tesseract.js works with script tags, [webpack](https://webpack.js.org/)/[Browserify](http://browserify.org/), and [Node.js](https://nodejs.org/en/). [After you install it](#installation), using it is as simple as
Tesseract.js works with script tags, webpack/browserify, and node. [After you install it](#installation), using it is as simple as
```javascript
```javascript
Tesseract.recognize(myImage)

@@ -22,24 +22,31 @@ .progress(function (p) { console.log('progress', p) })

## Provenance
Tesseract.js wraps an [emscripten](https://github.com/kripken/emscripten) [port](https://github.com/naptha/tesseract.js-core) of the [Tesseract](https://github.com/tesseract-ocr/tesseract) [OCR](https://en.wikipedia.org/wiki/Optical_character_recognition) Engine.
# Installation
Tesseract.js works with a `<script>` tag via local copy or cdn, with webpack and browserify via `npm`, and on node via `npm`. [Check out the docs](#docs) for a full treatment of the API.
Tesseract.js works with a `<script>` tag via local copy or CDN, with webpack and Browserify via `npm`, and on Node.js via `npm`. [Check out the docs](#docs) for a full treatment of the API.
## &lt;script />
You can simply include Tesseract.js with a cdn like this:
You can simply include Tesseract.js with a CDN like this:
```html
<script src='https://cdn.rawgit.com/naptha/tesseract.js/1.0.10/dist/tesseract.js'></script>
<script src='https://cdnjs.cloudflare.com/ajax/libs/tesseract.js/1.0.11/tesseract.min.js'></script>
```
After including your scripts, the `Tesseract` variable should be defined! You can [head to the docs](#docs) for a full treatment of the API.
After including your scripts, the `Tesseract` variable will be defined globally!
## npm
## Dependency
First:
```shell
> yarn add tesseract.js
```
or
```
> npm install tesseract.js --save
```
> Note: Tesseract.js currently requires node v6.8.0 or greater.
> Note: Tesseract.js currently requires Node.js v6.8.0 or higher.
Then
## Usage
```javascript

@@ -54,16 +61,15 @@ var Tesseract = require('tesseract.js')

You can [head to the docs](#docs) for a full treatment of the API.
# Docs
# Docs
* [Tesseract.recognize(image: ImageLike[, options]) -> [TesseractJob](#tesseractjob)](#tesseractrecognizeimage-imagelike-options---tesseractjob)
* [Tesseract.recognize](#tesseractrecognizeimage-imagelike-options---tesseractjob)
+ [Simple Example](#simple-example)
+ [More Complicated Example](#more-complicated-example)
* [Tesseract.detect(image: ImageLike) -> [TesseractJob](#tesseractjob)](#tesseractdetectimage-imagelike---tesseractjob)
* [Tesseract.detect](#tesseractdetectimage-imagelike---tesseractjob)
* [ImageLike](#imagelike)
* [TesseractJob](#tesseractjob)
+ [TesseractJob.progress(callback: function) -> TesseractJob](#tesseractjobprogresscallback-function---tesseractjob)
+ [TesseractJob.then(callback: function) -> TesseractJob](#tesseractjobthencallback-function---tesseractjob)
+ [TesseractJob.catch(callback: function) -> TesseractJob](#tesseractjoberrorcallback-function---tesseractjob)
+ [TesseractJob.finally(callback: function) -> TesseractJob](#tesseractjobfinallycallback-function---tesseractjob)
+ [TesseractJob.progress](#tesseractjobprogresscallback-function---tesseractjob)
+ [TesseractJob.then](#tesseractjobthencallback-function---tesseractjob)
+ [TesseractJob.catch](#tesseractjobcatchcallback-function---tesseractjob)
+ [TesseractJob.finally](#tesseractjobfinallycallback-function---tesseractjob)
* [Local Installation](#local-installation)

@@ -74,3 +80,3 @@ + [corePath](#corepath)

* [Contributing](#contributing)
+ [Development](#development)
+ [Development](#development)
+ [Building Static Files](#building-static-files)

@@ -81,4 +87,4 @@ + [Send us a Pull Request!](#send-us-a-pull-request)

## Tesseract.recognize(image: [ImageLike](#imagelike)[, options]) -> [TesseractJob](#tesseractjob)
Figures out what words are in `image`, where the words are in `image`, etc.
> Note: `image` should be be sufficiently high resolution.
Figures out what words are in `image`, where the words are in `image`, etc.
> Note: `image` should be sufficiently high resolution.
> Often, the same image will get much better results if you upscale it before calling `recognize`.

@@ -122,3 +128,3 @@

Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, `error` and `finally` methods can be used to act on the result of the script.
Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, `catch` and `finally` methods can be used to act on the result of the script.

@@ -150,3 +156,3 @@

In NodeJS, an image can be
In Node.js, an image can be
- a path to a local image

@@ -159,5 +165,5 @@ - a `Buffer` instance containing a `PNG` or `JPEG` image

A TesseractJob is an an object returned by a call to `recognize` or `detect`. It's inspired by the ES6 Promise interface and provides `then` and `catch` methods. It also provides `finally` method, which will be fired regardless of the job fate. One important difference is that these methods return the job itself (to enable chaining) rather than new.
A TesseractJob is an object returned by a call to `recognize` or `detect`. It's inspired by the ES6 Promise interface and provides `then` and `catch` methods. It also provides `finally` method, which will be fired regardless of the job fate. One important difference is that these methods return the job itself (to enable chaining) rather than new.
Typical use is:
Typical use is:
```javascript

@@ -187,6 +193,6 @@ Tesseract.recognize(myImage)

### TesseractJob.progress(callback: function) -> TesseractJob
Sets `callback` as the function that will be called every time the job progresses.
Sets `callback` as the function that will be called every time the job progresses.
- `callback` is a function with the signature `callback(progress)` where `progress` is a json object.
For example:
For example:
```javascript

@@ -197,3 +203,3 @@ Tesseract.recognize(myImage)

The console will show something like:
The console will show something like:
```javascript

@@ -213,7 +219,7 @@ progress is: {loaded_lang_model: "eng", from_cache: true}

### TesseractJob.then(callback: function) -> TesseractJob
Sets `callback` as the function that will be called if and when the job successfully completes.
Sets `callback` as the function that will be called if and when the job successfully completes.
- `callback` is a function with the signature `callback(result)` where `result` is a json object.
For example:
For example:
```javascript

@@ -224,3 +230,3 @@ Tesseract.recognize(myImage)

The console will show something like:
The console will show something like:
```javascript

@@ -243,4 +249,4 @@ result is: {

### TesseractJob.catch(callback: function) -> TesseractJob
Sets `callback` as the function that will be called if the job fails.
- `callback` is a function with the signature `callback(error)` where `error` is a json object.
Sets `callback` as the function that will be called if the job fails.
- `callback` is a function with the signature `callback(error)` where `error` is a json object.

@@ -253,5 +259,5 @@ ### TesseractJob.finally(callback: function) -> TesseractJob

In the browser, `tesseract.js` simply provides the API layer. Internally, it opens a WebWorker to handle requests. That worker itself loads code from the Emscripten-built `tesseract.js-core` which itself is hosted on a CDN. Then it dynamically loads language files hosted on another CDN.
In the browser, `tesseract.js` simply provides the API layer. Internally, it opens a WebWorker to handle requests. That worker itself loads code from the Emscripten-built `tesseract.js-core` which itself is hosted on a CDN. Then it dynamically loads language files hosted on another CDN.
Because of this we recommend loading `tesseract.js` from a CDN. But if you really need to have all your files local, you can use the `Tesseract.create` function which allows you to specify custom paths for workers, languages, and core.
Because of this we recommend loading `tesseract.js` from a CDN. But if you really need to have all your files local, you can use the `Tesseract.create` function which allows you to specify custom paths for workers, languages, and core.

@@ -261,4 +267,4 @@ ```javascript

workerPath: '/path/to/worker.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
langPath: 'https://rawcdn.githack.com/naptha/tessdata/gh-pages/3.02/',
corePath: 'https://rawcdn.githack.com/naptha/tesseract.js-core/0.1.0/index.js',
})

@@ -268,9 +274,9 @@ ```

### corePath
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://cdn.rawgit.com/naptha/tesseract.js-core/master/index.js'. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file.
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://rawcdn.githack.com/naptha/tesseract.js-core/master/index.js'. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file.
### workerPath
A string specifying the location of the [tesseract.worker.js](./dist/tesseract.worker.js) file. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file.
A string specifying the location of the [worker.js](./dist/worker.js) file. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file.
### langPath
A string specifying the location of the tesseract language files, with default value 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'. Language file urls are calculated according to the formula `langPath + langCode + '.traineddata.gz'`. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use different language files.
A string specifying the location of the tesseract language files, with default value 'https://rawcdn.githack.com/naptha/tessdata/gh-pages/3.02/'. Language file URLs are calculated according to the formula `langPath + langCode + '.traineddata.gz'`. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use different language files.

@@ -285,3 +291,3 @@

Then, cd in to the folder, `npm install`, and `npm start`
Then, `cd tesseract.js && npm install && npm start`
```shell

@@ -291,3 +297,3 @@ > cd tesseract.js

... a bunch of npm stuff ...
... a bunch of npm stuff ...

@@ -301,6 +307,6 @@ Starting up http-server, serving ./

Then open `http://localhost:7355/examples/file-input/demo.html` in your favorite browser. The devServer automatically rebuilds tesseract.js and tesseract.worker.js when you change files in the src folder.
Then open `http://localhost:7355/examples/file-input/demo.html` in your favorite browser. The devServer automatically rebuilds `tesseract.js` and `tesseract.worker.js` when you change files in the src folder.
### Building Static Files
After you've cloned the repo and run `npm install` as described in the [Development Section](#development), you can build static library files in the dist folder with
After you've cloned the repo and run `npm install` as described in the [Development Section](#development), you can build static library files in the dist folder with
```shell

@@ -307,0 +313,0 @@ > npm run build

var defaultOptions = {
// workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
// workerPath: 'https://rawcdn.githack.com/naptha/tesseract.js/0.2.0/dist/worker.js',
corePath: 'https://rawcdn.githack.com/naptha/tesseract.js-core/0.1.0/index.js',
langPath: 'https://rawcdn.githack.com/naptha/tessdata/gh-pages/3.02/',
}

@@ -12,3 +12,3 @@

var version = require('../../package.json').version;
defaultOptions.workerPath = 'https://cdn.rawgit.com/naptha/tesseract.js/' + version + '/dist/worker.js'
defaultOptions.workerPath = 'https://rawcdn.githack.com/naptha/tesseract.js/' + version + '/dist/worker.js'
}

@@ -15,0 +15,0 @@

@@ -1,5 +0,7 @@

var latestJob;
var Module;
var base;
var adapter = {};
var latestJob,
Module,
base,
adapter = {},
dump = require('./dump.js'),
desaturate = require('./desaturate.js');

@@ -10,10 +12,10 @@ function dispatchHandlers(packet, send){

jobId: packet.jobId,
status: status,
status,
action: packet.action,
data: data
})
data
});
}
respond.resolve = respond.bind(this, 'resolve')
respond.reject = respond.bind(this, 'reject')
respond.progress = respond.bind(this, 'progress')
respond.resolve = respond.bind(this, 'resolve');
respond.reject = respond.bind(this, 'reject');
respond.progress = respond.bind(this, 'progress');

@@ -24,5 +26,5 @@ latestJob = respond;

if(packet.action === 'recognize'){
handleRecognize(packet.payload, respond)
}else if(packet.action === 'detect'){
handleDetect(packet.payload, respond)
handleRecognize(packet.payload, respond);
} else if (packet.action === 'detect'){
handleDetect(packet.payload, respond);
}

@@ -37,3 +39,3 @@ } catch (err) {

adapter = impl;
}
};

@@ -44,3 +46,3 @@

if(['chi_sim', 'chi_tra', 'jpn'].indexOf(req.options.lang) != -1){
if(['chi_sim', 'chi_tra', 'jpn'].includes(req.options.lang)){
MIN_MEMORY = 167772160;

@@ -57,19 +59,13 @@ }

TesseractProgress(percent){
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) })
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) });
},
onRuntimeInitialized() {}
})
});
Module.FS_createPath("/", "tessdata", true, true)
base = new Module.TessBaseAPI()
res.progress({ status: 'initializing tesseract', progress: 1 })
Module.FS_createPath("/", "tessdata", true, true);
base = new Module.TessBaseAPI();
res.progress({ status: 'initializing tesseract', progress: 1 });
}
}
var dump = require('./dump.js')
var desaturate = require('./desaturate.js')
function setImage(Module, base, image){

@@ -82,3 +78,3 @@ var imgbin = desaturate(image),

base.SetImage(Module.wrapPointer(ptr), width, height, 1, width);
base.SetRectangle(0, 0, width, height)
base.SetRectangle(0, 0, width, height);
return ptr;

@@ -88,3 +84,4 @@ }

function loadLanguage(req, res, cb){
var lang = req.options.lang;
var lang = req.options.lang,
langFile = lang + '.traineddata';

@@ -95,7 +92,7 @@ if(!Module._loadedLanguages) Module._loadedLanguages = {};

adapter.getLanguageData(req, res, function(data){
res.progress({ status: 'loading ' + lang + '.traineddata', progress: 0 })
Module.FS_createDataFile('tessdata', lang + ".traineddata", data, true, false);
res.progress({ status: 'loading ' + langFile, progress: 0 });
Module.FS_createDataFile('tessdata', langFile, data, true, false);
Module._loadedLanguages[lang] = true;
res.progress({ status: 'loading ' + lang + '.traineddata', progress: 1 })
cb()
res.progress({ status: 'loading ' + langFile, progress: 1 });
cb();
})

@@ -107,12 +104,15 @@ }

function handleRecognize(req, res){
handleInit(req, res)
handleInit(req, res);
loadLanguage(req, res, function(){
var lang = req.options.lang;
loadLanguage(req, res, () => {
var options = req.options;
res.progress({ status: 'initializing api', progress: 0 })
base.Init(null, lang)
res.progress({ status: 'initializing api', progress: 0.3 })
function progressUpdate(progress){
res.progress({ status: 'initializing api', progress: progress });
}
var options = req.options;
progressUpdate(0);
base.Init(null, req.options.lang);
progressUpdate(.3);
for (var option in options) {

@@ -124,9 +124,9 @@ if (options.hasOwnProperty(option)) {

res.progress({ status: 'initializing api', progress: 0.6 })
progressUpdate(.6);
var ptr = setImage(Module, base, req.image);
res.progress({ status: 'initializing api', progress: 1 })
progressUpdate(1);
base.Recognize(null)
base.Recognize(null);
var result = dump(Module, base)
var result = dump(Module, base);

@@ -142,38 +142,32 @@ base.End();

function handleDetect(req, res){
handleInit(req, res)
handleInit(req, res);
req.options.lang = 'osd';
loadLanguage(req, res, function(){
loadLanguage(req, res, () => {
base.Init(null, 'osd');
base.SetPageSegMode(Module.PSM_OSD_ONLY);
base.Init(null, 'osd')
base.SetPageSegMode(Module.PSM_OSD_ONLY)
var ptr = setImage(Module, base, req.image);
var ptr = setImage(Module, base, req.image),
results = new Module.OSResults();
var results = new Module.OSResults();
var success = base.DetectOS(results);
if(!success){
if(!base.DetectOS(results)){
base.End();
Module._free(ptr);
res.reject("failed to detect os")
res.reject("Failed to detect OS");
} else {
var charset = results.get_unicharset()
var best = results.get_best_result()
var oid = best.get_orientation_id(),
var best = results.get_best_result(),
oid = best.get_orientation_id(),
sid = best.get_script_id();
var result = {
base.End();
Module._free(ptr);
res.resolve({
tesseract_script_id: sid,
script: charset.get_script_from_script_id(sid),
script: results.get_unicharset().get_script_from_script_id(sid),
script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence()
}
base.End();
Module._free(ptr);
res.resolve(result)
});
}
})
});
}
const adapter = require('./node/index.js')
const circularize = require('./common/circularize.js')
const TesseractJob = require('./common/job');
const objectAssign = require('object-assign');
const version = require('../package.json').version;
function create(workerOptions){
workerOptions = workerOptions || {};
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions))
function create(workerOptions = {}){
var worker = new TesseractWorker(Object.assign({}, adapter.defaultOptions, workerOptions));
worker.create = create;

@@ -20,21 +18,16 @@ worker.version = version;

this._currentJob = null;
this._queue = []
this._queue = [];
}
recognize(image, options){
recognize(image, options = {}){
return this._delay(job => {
if(typeof options === 'string'){
options = { lang: options };
}else{
options = options || {}
options.lang = options.lang || 'eng';
}
if (typeof options === 'string') options = {lang: options}
options.lang = options.lang || 'eng';
job._send('recognize', { image: image, options: options, workerOptions: this.workerOptions })
job._send('recognize', { image, options, workerOptions: this.workerOptions });
})
}
detect(image, options){
options = options || {}
detect(image, options = {}){
return this._delay(job => {
job._send('detect', { image: image, options: options, workerOptions: this.workerOptions })
job._send('detect', { image, options, workerOptions: this.workerOptions });
})

@@ -46,2 +39,4 @@ }

this.worker = null;
this._currentJob = null;
this._queue = [];
}

@@ -54,8 +49,8 @@

this._queue.push(e => {
this._queue.shift()
this._queue.shift();
this._currentJob = job;
fn(job)
})
fn(job);
});
if(!this._currentJob) this._dequeue();
return job
return job;
}

@@ -65,4 +60,4 @@

this._currentJob = null;
if(this._queue.length > 0){
this._queue[0]()
if(this._queue.length){
this._queue[0]();
}

@@ -72,3 +67,2 @@ }

_recv(packet){
if(packet.status === 'resolve' && packet.action === 'recognize'){

@@ -80,3 +74,3 @@ packet.data = circularize(packet.data);

this._currentJob._handle(packet)
}else{
} else {
console.warn('Job ID ' + packet.jobId + ' not known.')

@@ -87,4 +81,2 @@ }

var DefaultTesseract = create()
module.exports = DefaultTesseract
module.exports = create();

@@ -1,18 +0,16 @@

const path = require('path')
const fetch = require('node-fetch')
const isURL = require('is-url')
const fetch = require('isomorphic-fetch'),
isURL = require('is-url'),
fork = require('child_process').fork,
fs = require('fs');
exports.defaultOptions = {
workerPath: path.join(__dirname, 'worker.js'),
langPath: 'http://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
workerPath: require('path').join(__dirname, 'worker.js'),
langPath: 'http://rawcdn.githack.com/naptha/tessdata/gh-pages/3.02/',
}
const fork = require('child_process').fork;
const fs = require('fs')
exports.spawnWorker = function spawnWorker(instance, workerOptions){
var cp = fork(workerOptions.workerPath);
cp.on('message', function(packet){
instance._recv(packet)
})
cp.on('message', packet => {
instance._recv(packet);
});
return cp;

@@ -22,10 +20,10 @@ }

exports.terminateWorker = function(instance){
instance.worker.kill()
instance.worker.kill();
}
exports.sendPacket = function sendPacket(instance, packet){
loadImage(packet.payload.image, function(img){
packet.payload.image = img
instance.worker.send(packet)
})
loadImage(packet.payload.image, img => {
packet.payload.image = img;
instance.worker.send(packet);
});
}

@@ -37,9 +35,6 @@

if(isURL(image)) {
fetch(image).then(function (resp) {
return resp.buffer();
}).then(function (buffer) {
return loadImage(buffer, cb);
}).catch(function (err) {
return console.error(err);
});
fetch(image)
.then(resp => resp.buffer())
.then(buffer => loadImage(buffer, cb))
.catch(err => console.error(err));
}

@@ -50,8 +45,7 @@

if (err) throw err;
loadImage(buffer, cb)
})
return
}else if(image instanceof Buffer){
var fileType = require('file-type');
var mime = fileType(image).mime
loadImage(buffer, cb);
});
return;
} else if (image instanceof Buffer){
var mime = require('file-type')(image).mime

@@ -74,16 +68,14 @@ if(mime === 'image/png'){

image.data[offset] = pix[0]
image.data[offset + 1] = pix[1]
image.data[offset + 2] = pix[2]
image.data[offset] = pix[0];
image.data[offset + 1] = pix[1];
image.data[offset + 2] = pix[2];
image.data[offset + 3] = pix[3];
}
}
// console.log(image)
loadImage(image, cb)
loadImage(image, cb);
});
return
}else if(mime === 'image/jpeg'){
var jpeg = require('jpeg-js');
loadImage(jpeg.decode(image), cb)
return
return;
} else if (mime === 'image/jpeg'){
loadImage(require('jpeg-js').decode(image), cb);
return;
}

@@ -97,6 +89,6 @@

if(image && image.data && image.data.length && !Array.isArray(image.data)){
image.data = Array.from(image.data)
image.data = Array.from(image.data);
return loadImage(image, cb)
}
cb(image)
}
cb(image);
}
const http = require("http"),
zlib = require("zlib"),
fs = require("fs"),
path = require("path");
path = require("path"),
isURL = require("is-url");

@@ -9,10 +10,15 @@ var langdata = require('../common/langdata.json')

function getLanguageData(req, res, cb){
var lang = req.options.lang;
var langfile = lang + '.traineddata.gz';
var url = req.workerOptions.langPath + langfile;
var lang = req.options.lang,
langfile = lang + '.traineddata.gz';
fs.readFile(lang + '.traineddata', function (err, data) {
// langPath defaults to a URL where languages can be downloaded. If a custom path is specified
// and it is a local path, use that instead
var localPath = isURL(req.workerOptions.langPath) ?
lang + '.traineddata' :
path.join(req.workerOptions.langPath, lang + '.traineddata');
fs.readFile(localPath, function (err, data) {
if(!err) return cb(new Uint8Array(data));
http.get(url, function(stream){
http.get(req.workerOptions.langPath + langfile, stream => {
var received_bytes = 0;

@@ -24,3 +30,3 @@ stream.on('data', function(chunk) {

loaded: received_bytes,
progress: Math.min(1, received_bytes / langdata[lang])
progress: Math.min(1, received_bytes / langdata[lang])
});

@@ -32,4 +38,6 @@

stream.pipe(gunzip).pipe(fs.createWriteStream(lang + '.traineddata'))
gunzip.on('end', function(){ getLanguageData(req, stream, cb) })
})
gunzip.on('end',() => {
getLanguageData(req, stream, cb)
});
});
});

@@ -39,2 +47,2 @@ }

module.exports = getLanguageData;
module.exports = getLanguageData;

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc