tesseract.js
Advanced tools
Comparing version 1.0.10 to 1.0.11
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Tesseract = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){ | ||
'use strict'; | ||
/* eslint-disable no-unused-vars */ | ||
var hasOwnProperty = Object.prototype.hasOwnProperty; | ||
var propIsEnumerable = Object.prototype.propertyIsEnumerable; | ||
function toObject(val) { | ||
if (val === null || val === undefined) { | ||
throw new TypeError('Object.assign cannot be called with null or undefined'); | ||
} | ||
return Object(val); | ||
} | ||
function shouldUseNative() { | ||
try { | ||
if (!Object.assign) { | ||
return false; | ||
} | ||
// Detect buggy property enumeration order in older V8 versions. | ||
// https://bugs.chromium.org/p/v8/issues/detail?id=4118 | ||
var test1 = new String('abc'); // eslint-disable-line | ||
test1[5] = 'de'; | ||
if (Object.getOwnPropertyNames(test1)[0] === '5') { | ||
return false; | ||
} | ||
// https://bugs.chromium.org/p/v8/issues/detail?id=3056 | ||
var test2 = {}; | ||
for (var i = 0; i < 10; i++) { | ||
test2['_' + String.fromCharCode(i)] = i; | ||
} | ||
var order2 = Object.getOwnPropertyNames(test2).map(function (n) { | ||
return test2[n]; | ||
}); | ||
if (order2.join('') !== '0123456789') { | ||
return false; | ||
} | ||
// https://bugs.chromium.org/p/v8/issues/detail?id=3056 | ||
var test3 = {}; | ||
'abcdefghijklmnopqrst'.split('').forEach(function (letter) { | ||
test3[letter] = letter; | ||
}); | ||
if (Object.keys(Object.assign({}, test3)).join('') !== | ||
'abcdefghijklmnopqrst') { | ||
return false; | ||
} | ||
return true; | ||
} catch (e) { | ||
// We don't expect any of the above to throw, but better to be safe. | ||
return false; | ||
} | ||
} | ||
module.exports = shouldUseNative() ? Object.assign : function (target, source) { | ||
var from; | ||
var to = toObject(target); | ||
var symbols; | ||
for (var s = 1; s < arguments.length; s++) { | ||
from = Object(arguments[s]); | ||
for (var key in from) { | ||
if (hasOwnProperty.call(from, key)) { | ||
to[key] = from[key]; | ||
} | ||
} | ||
if (Object.getOwnPropertySymbols) { | ||
symbols = Object.getOwnPropertySymbols(from); | ||
for (var i = 0; i < symbols.length; i++) { | ||
if (propIsEnumerable.call(from, symbols[i])) { | ||
to[symbols[i]] = from[symbols[i]]; | ||
} | ||
} | ||
} | ||
} | ||
return to; | ||
}; | ||
},{}],2:[function(require,module,exports){ | ||
// shim for using process in browser | ||
@@ -257,3 +172,7 @@ var process = module.exports = {}; | ||
process.emit = noop; | ||
process.prependListener = noop; | ||
process.prependOnceListener = noop; | ||
process.listeners = function (name) { return [] } | ||
process.binding = function (name) { | ||
@@ -269,6 +188,6 @@ throw new Error('process.binding is not supported'); | ||
},{}],3:[function(require,module,exports){ | ||
},{}],2:[function(require,module,exports){ | ||
module.exports={ | ||
"name": "tesseract.js", | ||
"version": "1.0.10", | ||
"version": "1.0.11", | ||
"description": "Pure Javascript Multilingual OCR", | ||
@@ -298,2 +217,3 @@ "main": "src/index.js", | ||
"file-type": "^3.8.0", | ||
"isomorphic-fetch": "^2.2.1", | ||
"is-url": "^1.2.2", | ||
@@ -317,3 +237,3 @@ "jpeg-js": "^0.2.0", | ||
},{}],4:[function(require,module,exports){ | ||
},{}],3:[function(require,module,exports){ | ||
(function (process){ | ||
@@ -323,5 +243,5 @@ 'use strict'; | ||
var defaultOptions = { | ||
// workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js', | ||
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js', | ||
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/' | ||
// workerPath: 'https://rawcdn.githack.com/naptha/tesseract.js/0.2.0/dist/worker.js', | ||
corePath: 'https://rawcdn.githack.com/naptha/tesseract.js-core/0.1.0/index.js', | ||
langPath: 'https://rawcdn.githack.com/naptha/tessdata/gh-pages/3.02/' | ||
}; | ||
@@ -334,3 +254,3 @@ | ||
var version = require('../../package.json').version; | ||
defaultOptions.workerPath = 'https://cdn.rawgit.com/naptha/tesseract.js/' + version + '/dist/worker.js'; | ||
defaultOptions.workerPath = 'https://rawcdn.githack.com/naptha/tesseract.js/' + version + '/dist/worker.js'; | ||
} | ||
@@ -427,3 +347,3 @@ | ||
}).call(this,require('_process')) | ||
},{"../../package.json":3,"_process":2}],5:[function(require,module,exports){ | ||
},{"../../package.json":2,"_process":1}],4:[function(require,module,exports){ | ||
"use strict"; | ||
@@ -495,3 +415,3 @@ | ||
},{}],6:[function(require,module,exports){ | ||
},{}],5:[function(require,module,exports){ | ||
'use strict'; | ||
@@ -607,3 +527,3 @@ | ||
},{"../node/index.js":4}],7:[function(require,module,exports){ | ||
},{"../node/index.js":3}],6:[function(require,module,exports){ | ||
'use strict'; | ||
@@ -618,8 +538,8 @@ | ||
var TesseractJob = require('./common/job'); | ||
var objectAssign = require('object-assign'); | ||
var version = require('../package.json').version; | ||
function create(workerOptions) { | ||
workerOptions = workerOptions || {}; | ||
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions)); | ||
function create() { | ||
var workerOptions = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; | ||
var worker = new TesseractWorker(Object.assign({}, adapter.defaultOptions, workerOptions)); | ||
worker.create = create; | ||
@@ -642,12 +562,10 @@ worker.version = version; | ||
key: 'recognize', | ||
value: function recognize(image, options) { | ||
value: function recognize(image) { | ||
var _this = this; | ||
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; | ||
return this._delay(function (job) { | ||
if (typeof options === 'string') { | ||
options = { lang: options }; | ||
} else { | ||
options = options || {}; | ||
options.lang = options.lang || 'eng'; | ||
} | ||
if (typeof options === 'string') options = { lang: options }; | ||
options.lang = options.lang || 'eng'; | ||
@@ -659,6 +577,7 @@ job._send('recognize', { image: image, options: options, workerOptions: _this.workerOptions }); | ||
key: 'detect', | ||
value: function detect(image, options) { | ||
value: function detect(image) { | ||
var _this2 = this; | ||
options = options || {}; | ||
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; | ||
return this._delay(function (job) { | ||
@@ -673,2 +592,4 @@ job._send('detect', { image: image, options: options, workerOptions: _this2.workerOptions }); | ||
this.worker = null; | ||
this._currentJob = null; | ||
this._queue = []; | ||
} | ||
@@ -695,3 +616,3 @@ }, { | ||
this._currentJob = null; | ||
if (this._queue.length > 0) { | ||
if (this._queue.length) { | ||
this._queue[0](); | ||
@@ -703,3 +624,2 @@ } | ||
value: function _recv(packet) { | ||
if (packet.status === 'resolve' && packet.action === 'recognize') { | ||
@@ -720,7 +640,5 @@ packet.data = circularize(packet.data); | ||
var DefaultTesseract = create(); | ||
module.exports = create(); | ||
module.exports = DefaultTesseract; | ||
},{"../package.json":3,"./common/circularize.js":5,"./common/job":6,"./node/index.js":4,"object-assign":1}]},{},[7])(7) | ||
},{"../package.json":2,"./common/circularize.js":4,"./common/job":5,"./node/index.js":3}]},{},[6])(6) | ||
}); |
@@ -28,2 +28,3 @@ # Tesseract Languages | ||
| 'eus' | Basque | | ||
| 'fas' |Persian (Farsi) | | ||
| 'fin' | Finnish | | ||
@@ -30,0 +31,0 @@ | 'fra' | French | |
@@ -1,14 +0,15 @@ | ||
var path = require('path'); | ||
var Tesseract = require('../../') // replace this with require('tesseract.js') | ||
var image = path.resolve(__dirname, 'cosmic.png'); | ||
// replace this with require('tesseract.js') | ||
var Tesseract = require('../../'), | ||
image = require('path').resolve(__dirname, 'cosmic.png'); | ||
Tesseract.recognize(image) | ||
.then(data => { | ||
console.log('then\n', data.text) | ||
}) | ||
.catch(err => { | ||
console.log('catch\n', err); | ||
}) | ||
.finally(e => { | ||
console.log('finally\n'); | ||
}); | ||
.then(data => { | ||
console.log('then\n', data.text) | ||
}) | ||
.catch(err => { | ||
console.log('catch\n', err); | ||
}) | ||
.finally(e => { | ||
console.log('finally\n'); | ||
process.exit(); | ||
}); |
@@ -1,11 +0,12 @@ | ||
var path = require('path'); | ||
var Tesseract = require('../../') // replace this with require('tesseract.js') | ||
var image = path.resolve(__dirname, 'cosmic.png'); | ||
// replace this with require('tesseract.js') | ||
var Tesseract = require('../../'), | ||
image = require('path').resolve(__dirname, 'cosmic.png'); | ||
Tesseract.detect(image) | ||
.progress(function(info){ | ||
console.log(info) | ||
}) | ||
.then(function(data){ | ||
console.log('done', data) | ||
}) | ||
.progress(function(info){ | ||
console.log(info); | ||
}) | ||
.then(function(data){ | ||
console.log('done', data); | ||
process.exit(); | ||
}) |
{ | ||
"name": "tesseract.js", | ||
"version": "1.0.10", | ||
"version": "1.0.11", | ||
"description": "Pure Javascript Multilingual OCR", | ||
@@ -28,2 +28,3 @@ "main": "src/index.js", | ||
"file-type": "^3.8.0", | ||
"isomorphic-fetch": "^2.2.1", | ||
"is-url": "^1.2.2", | ||
@@ -30,0 +31,0 @@ "jpeg-js": "^0.2.0", |
102
README.md
# [Tesseract.js](http://tesseract.projectnaptha.com/) | ||
[![npm version](https://badge.fury.io/js/tesseract.js.svg)](https://badge.fury.io/js/tesseract.js) | ||
[![Beerpay](https://beerpay.io/naptha/tesseract.js/badge.svg)](https://beerpay.io/naptha/tesseract.js) | ||
[![NPM version][tesseractjs-npm-image]][tesseractjs-npm-url] | ||
[tesseractjs-npm-image]: https://img.shields.io/npm/v/tesseract.js.svg | ||
[tesseractjs-npm-url]: https://npmjs.org/package/tesseract.js | ||
Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/)) | ||
<!-- Under the hood, Tesseract.js wraps [tesseract.js-core](https://github.com/naptha/tesseract.js-core), an [emscripten](https://github.com/kripken/emscripten) port of the [Tesseract OCR Engine](https://github.com/tesseract-ocr/tesseract). | ||
--> | ||
[![fancy demo gif](https://github.com/naptha/tesseract.js/blob/master/demo.gif)](http://tesseract.projectnaptha.com) | ||
[![fancy demo gif](./demo.gif "Demo")](http://tesseract.projectnaptha.com) | ||
Tesseract.js works with script tags, [webpack](https://webpack.js.org/)/[Browserify](http://browserify.org/), and [Node.js](https://nodejs.org/en/). [After you install it](#installation), using it is as simple as | ||
Tesseract.js works with script tags, webpack/browserify, and node. [After you install it](#installation), using it is as simple as | ||
```javascript | ||
```javascript | ||
Tesseract.recognize(myImage) | ||
@@ -22,24 +22,31 @@ .progress(function (p) { console.log('progress', p) }) | ||
## Provenance | ||
Tesseract.js wraps an [emscripten](https://github.com/kripken/emscripten) [port](https://github.com/naptha/tesseract.js-core) of the [Tesseract](https://github.com/tesseract-ocr/tesseract) [OCR](https://en.wikipedia.org/wiki/Optical_character_recognition) Engine. | ||
# Installation | ||
Tesseract.js works with a `<script>` tag via local copy or cdn, with webpack and browserify via `npm`, and on node via `npm`. [Check out the docs](#docs) for a full treatment of the API. | ||
Tesseract.js works with a `<script>` tag via local copy or CDN, with webpack and Browserify via `npm`, and on Node.js via `npm`. [Check out the docs](#docs) for a full treatment of the API. | ||
## <script /> | ||
You can simply include Tesseract.js with a cdn like this: | ||
You can simply include Tesseract.js with a CDN like this: | ||
```html | ||
<script src='https://cdn.rawgit.com/naptha/tesseract.js/1.0.10/dist/tesseract.js'></script> | ||
<script src='https://cdnjs.cloudflare.com/ajax/libs/tesseract.js/1.0.11/tesseract.min.js'></script> | ||
``` | ||
After including your scripts, the `Tesseract` variable should be defined! You can [head to the docs](#docs) for a full treatment of the API. | ||
After including your scripts, the `Tesseract` variable will be defined globally! | ||
## npm | ||
## Dependency | ||
First: | ||
```shell | ||
> yarn add tesseract.js | ||
``` | ||
or | ||
``` | ||
> npm install tesseract.js --save | ||
``` | ||
> Note: Tesseract.js currently requires node v6.8.0 or greater. | ||
> Note: Tesseract.js currently requires Node.js v6.8.0 or higher. | ||
Then | ||
## Usage | ||
```javascript | ||
@@ -54,16 +61,15 @@ var Tesseract = require('tesseract.js') | ||
You can [head to the docs](#docs) for a full treatment of the API. | ||
# Docs | ||
# Docs | ||
* [Tesseract.recognize(image: ImageLike[, options]) -> [TesseractJob](#tesseractjob)](#tesseractrecognizeimage-imagelike-options---tesseractjob) | ||
* [Tesseract.recognize](#tesseractrecognizeimage-imagelike-options---tesseractjob) | ||
+ [Simple Example](#simple-example) | ||
+ [More Complicated Example](#more-complicated-example) | ||
* [Tesseract.detect(image: ImageLike) -> [TesseractJob](#tesseractjob)](#tesseractdetectimage-imagelike---tesseractjob) | ||
* [Tesseract.detect](#tesseractdetectimage-imagelike---tesseractjob) | ||
* [ImageLike](#imagelike) | ||
* [TesseractJob](#tesseractjob) | ||
+ [TesseractJob.progress(callback: function) -> TesseractJob](#tesseractjobprogresscallback-function---tesseractjob) | ||
+ [TesseractJob.then(callback: function) -> TesseractJob](#tesseractjobthencallback-function---tesseractjob) | ||
+ [TesseractJob.catch(callback: function) -> TesseractJob](#tesseractjoberrorcallback-function---tesseractjob) | ||
+ [TesseractJob.finally(callback: function) -> TesseractJob](#tesseractjobfinallycallback-function---tesseractjob) | ||
+ [TesseractJob.progress](#tesseractjobprogresscallback-function---tesseractjob) | ||
+ [TesseractJob.then](#tesseractjobthencallback-function---tesseractjob) | ||
+ [TesseractJob.catch](#tesseractjobcatchcallback-function---tesseractjob) | ||
+ [TesseractJob.finally](#tesseractjobfinallycallback-function---tesseractjob) | ||
* [Local Installation](#local-installation) | ||
@@ -74,3 +80,3 @@ + [corePath](#corepath) | ||
* [Contributing](#contributing) | ||
+ [Development](#development) | ||
+ [Development](#development) | ||
+ [Building Static Files](#building-static-files) | ||
@@ -81,4 +87,4 @@ + [Send us a Pull Request!](#send-us-a-pull-request) | ||
## Tesseract.recognize(image: [ImageLike](#imagelike)[, options]) -> [TesseractJob](#tesseractjob) | ||
Figures out what words are in `image`, where the words are in `image`, etc. | ||
> Note: `image` should be be sufficiently high resolution. | ||
Figures out what words are in `image`, where the words are in `image`, etc. | ||
> Note: `image` should be sufficiently high resolution. | ||
> Often, the same image will get much better results if you upscale it before calling `recognize`. | ||
@@ -122,3 +128,3 @@ | ||
Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, `error` and `finally` methods can be used to act on the result of the script. | ||
Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, `catch` and `finally` methods can be used to act on the result of the script. | ||
@@ -150,3 +156,3 @@ | ||
In NodeJS, an image can be | ||
In Node.js, an image can be | ||
- a path to a local image | ||
@@ -159,5 +165,5 @@ - a `Buffer` instance containing a `PNG` or `JPEG` image | ||
A TesseractJob is an an object returned by a call to `recognize` or `detect`. It's inspired by the ES6 Promise interface and provides `then` and `catch` methods. It also provides `finally` method, which will be fired regardless of the job fate. One important difference is that these methods return the job itself (to enable chaining) rather than new. | ||
A TesseractJob is an object returned by a call to `recognize` or `detect`. It's inspired by the ES6 Promise interface and provides `then` and `catch` methods. It also provides `finally` method, which will be fired regardless of the job fate. One important difference is that these methods return the job itself (to enable chaining) rather than new. | ||
Typical use is: | ||
Typical use is: | ||
```javascript | ||
@@ -187,6 +193,6 @@ Tesseract.recognize(myImage) | ||
### TesseractJob.progress(callback: function) -> TesseractJob | ||
Sets `callback` as the function that will be called every time the job progresses. | ||
Sets `callback` as the function that will be called every time the job progresses. | ||
- `callback` is a function with the signature `callback(progress)` where `progress` is a json object. | ||
For example: | ||
For example: | ||
```javascript | ||
@@ -197,3 +203,3 @@ Tesseract.recognize(myImage) | ||
The console will show something like: | ||
The console will show something like: | ||
```javascript | ||
@@ -213,7 +219,7 @@ progress is: {loaded_lang_model: "eng", from_cache: true} | ||
### TesseractJob.then(callback: function) -> TesseractJob | ||
Sets `callback` as the function that will be called if and when the job successfully completes. | ||
Sets `callback` as the function that will be called if and when the job successfully completes. | ||
- `callback` is a function with the signature `callback(result)` where `result` is a json object. | ||
For example: | ||
For example: | ||
```javascript | ||
@@ -224,3 +230,3 @@ Tesseract.recognize(myImage) | ||
The console will show something like: | ||
The console will show something like: | ||
```javascript | ||
@@ -243,4 +249,4 @@ result is: { | ||
### TesseractJob.catch(callback: function) -> TesseractJob | ||
Sets `callback` as the function that will be called if the job fails. | ||
- `callback` is a function with the signature `callback(error)` where `error` is a json object. | ||
Sets `callback` as the function that will be called if the job fails. | ||
- `callback` is a function with the signature `callback(error)` where `error` is a json object. | ||
@@ -253,5 +259,5 @@ ### TesseractJob.finally(callback: function) -> TesseractJob | ||
In the browser, `tesseract.js` simply provides the API layer. Internally, it opens a WebWorker to handle requests. That worker itself loads code from the Emscripten-built `tesseract.js-core` which itself is hosted on a CDN. Then it dynamically loads language files hosted on another CDN. | ||
In the browser, `tesseract.js` simply provides the API layer. Internally, it opens a WebWorker to handle requests. That worker itself loads code from the Emscripten-built `tesseract.js-core` which itself is hosted on a CDN. Then it dynamically loads language files hosted on another CDN. | ||
Because of this we recommend loading `tesseract.js` from a CDN. But if you really need to have all your files local, you can use the `Tesseract.create` function which allows you to specify custom paths for workers, languages, and core. | ||
Because of this we recommend loading `tesseract.js` from a CDN. But if you really need to have all your files local, you can use the `Tesseract.create` function which allows you to specify custom paths for workers, languages, and core. | ||
@@ -261,4 +267,4 @@ ```javascript | ||
workerPath: '/path/to/worker.js', | ||
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', | ||
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js', | ||
langPath: 'https://rawcdn.githack.com/naptha/tessdata/gh-pages/3.02/', | ||
corePath: 'https://rawcdn.githack.com/naptha/tesseract.js-core/0.1.0/index.js', | ||
}) | ||
@@ -268,9 +274,9 @@ ``` | ||
### corePath | ||
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://cdn.rawgit.com/naptha/tesseract.js-core/master/index.js'. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file. | ||
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://rawcdn.githack.com/naptha/tesseract.js-core/master/index.js'. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file. | ||
### workerPath | ||
A string specifying the location of the [tesseract.worker.js](./dist/tesseract.worker.js) file. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file. | ||
A string specifying the location of the [worker.js](./dist/worker.js) file. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file. | ||
### langPath | ||
A string specifying the location of the tesseract language files, with default value 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'. Language file urls are calculated according to the formula `langPath + langCode + '.traineddata.gz'`. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use different language files. | ||
A string specifying the location of the tesseract language files, with default value 'https://rawcdn.githack.com/naptha/tessdata/gh-pages/3.02/'. Language file URLs are calculated according to the formula `langPath + langCode + '.traineddata.gz'`. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use different language files. | ||
@@ -285,3 +291,3 @@ | ||
Then, cd in to the folder, `npm install`, and `npm start` | ||
Then, `cd tesseract.js && npm install && npm start` | ||
```shell | ||
@@ -291,3 +297,3 @@ > cd tesseract.js | ||
... a bunch of npm stuff ... | ||
... a bunch of npm stuff ... | ||
@@ -301,6 +307,6 @@ Starting up http-server, serving ./ | ||
Then open `http://localhost:7355/examples/file-input/demo.html` in your favorite browser. The devServer automatically rebuilds tesseract.js and tesseract.worker.js when you change files in the src folder. | ||
Then open `http://localhost:7355/examples/file-input/demo.html` in your favorite browser. The devServer automatically rebuilds `tesseract.js` and `tesseract.worker.js` when you change files in the src folder. | ||
### Building Static Files | ||
After you've cloned the repo and run `npm install` as described in the [Development Section](#development), you can build static library files in the dist folder with | ||
After you've cloned the repo and run `npm install` as described in the [Development Section](#development), you can build static library files in the dist folder with | ||
```shell | ||
@@ -307,0 +313,0 @@ > npm run build |
var defaultOptions = { | ||
// workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js', | ||
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js', | ||
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', | ||
// workerPath: 'https://rawcdn.githack.com/naptha/tesseract.js/0.2.0/dist/worker.js', | ||
corePath: 'https://rawcdn.githack.com/naptha/tesseract.js-core/0.1.0/index.js', | ||
langPath: 'https://rawcdn.githack.com/naptha/tessdata/gh-pages/3.02/', | ||
} | ||
@@ -12,3 +12,3 @@ | ||
var version = require('../../package.json').version; | ||
defaultOptions.workerPath = 'https://cdn.rawgit.com/naptha/tesseract.js/' + version + '/dist/worker.js' | ||
defaultOptions.workerPath = 'https://rawcdn.githack.com/naptha/tesseract.js/' + version + '/dist/worker.js' | ||
} | ||
@@ -15,0 +15,0 @@ |
@@ -1,5 +0,7 @@ | ||
var latestJob; | ||
var Module; | ||
var base; | ||
var adapter = {}; | ||
var latestJob, | ||
Module, | ||
base, | ||
adapter = {}, | ||
dump = require('./dump.js'), | ||
desaturate = require('./desaturate.js'); | ||
@@ -10,10 +12,10 @@ function dispatchHandlers(packet, send){ | ||
jobId: packet.jobId, | ||
status: status, | ||
status, | ||
action: packet.action, | ||
data: data | ||
}) | ||
data | ||
}); | ||
} | ||
respond.resolve = respond.bind(this, 'resolve') | ||
respond.reject = respond.bind(this, 'reject') | ||
respond.progress = respond.bind(this, 'progress') | ||
respond.resolve = respond.bind(this, 'resolve'); | ||
respond.reject = respond.bind(this, 'reject'); | ||
respond.progress = respond.bind(this, 'progress'); | ||
@@ -24,5 +26,5 @@ latestJob = respond; | ||
if(packet.action === 'recognize'){ | ||
handleRecognize(packet.payload, respond) | ||
}else if(packet.action === 'detect'){ | ||
handleDetect(packet.payload, respond) | ||
handleRecognize(packet.payload, respond); | ||
} else if (packet.action === 'detect'){ | ||
handleDetect(packet.payload, respond); | ||
} | ||
@@ -37,3 +39,3 @@ } catch (err) { | ||
adapter = impl; | ||
} | ||
}; | ||
@@ -44,3 +46,3 @@ | ||
if(['chi_sim', 'chi_tra', 'jpn'].indexOf(req.options.lang) != -1){ | ||
if(['chi_sim', 'chi_tra', 'jpn'].includes(req.options.lang)){ | ||
MIN_MEMORY = 167772160; | ||
@@ -57,19 +59,13 @@ } | ||
TesseractProgress(percent){ | ||
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }) | ||
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }); | ||
}, | ||
onRuntimeInitialized() {} | ||
}) | ||
}); | ||
Module.FS_createPath("/", "tessdata", true, true) | ||
base = new Module.TessBaseAPI() | ||
res.progress({ status: 'initializing tesseract', progress: 1 }) | ||
Module.FS_createPath("/", "tessdata", true, true); | ||
base = new Module.TessBaseAPI(); | ||
res.progress({ status: 'initializing tesseract', progress: 1 }); | ||
} | ||
} | ||
var dump = require('./dump.js') | ||
var desaturate = require('./desaturate.js') | ||
function setImage(Module, base, image){ | ||
@@ -82,3 +78,3 @@ var imgbin = desaturate(image), | ||
base.SetImage(Module.wrapPointer(ptr), width, height, 1, width); | ||
base.SetRectangle(0, 0, width, height) | ||
base.SetRectangle(0, 0, width, height); | ||
return ptr; | ||
@@ -88,3 +84,4 @@ } | ||
function loadLanguage(req, res, cb){ | ||
var lang = req.options.lang; | ||
var lang = req.options.lang, | ||
langFile = lang + '.traineddata'; | ||
@@ -95,7 +92,7 @@ if(!Module._loadedLanguages) Module._loadedLanguages = {}; | ||
adapter.getLanguageData(req, res, function(data){ | ||
res.progress({ status: 'loading ' + lang + '.traineddata', progress: 0 }) | ||
Module.FS_createDataFile('tessdata', lang + ".traineddata", data, true, false); | ||
res.progress({ status: 'loading ' + langFile, progress: 0 }); | ||
Module.FS_createDataFile('tessdata', langFile, data, true, false); | ||
Module._loadedLanguages[lang] = true; | ||
res.progress({ status: 'loading ' + lang + '.traineddata', progress: 1 }) | ||
cb() | ||
res.progress({ status: 'loading ' + langFile, progress: 1 }); | ||
cb(); | ||
}) | ||
@@ -107,12 +104,15 @@ } | ||
function handleRecognize(req, res){ | ||
handleInit(req, res) | ||
handleInit(req, res); | ||
loadLanguage(req, res, function(){ | ||
var lang = req.options.lang; | ||
loadLanguage(req, res, () => { | ||
var options = req.options; | ||
res.progress({ status: 'initializing api', progress: 0 }) | ||
base.Init(null, lang) | ||
res.progress({ status: 'initializing api', progress: 0.3 }) | ||
function progressUpdate(progress){ | ||
res.progress({ status: 'initializing api', progress: progress }); | ||
} | ||
var options = req.options; | ||
progressUpdate(0); | ||
base.Init(null, req.options.lang); | ||
progressUpdate(.3); | ||
for (var option in options) { | ||
@@ -124,9 +124,9 @@ if (options.hasOwnProperty(option)) { | ||
res.progress({ status: 'initializing api', progress: 0.6 }) | ||
progressUpdate(.6); | ||
var ptr = setImage(Module, base, req.image); | ||
res.progress({ status: 'initializing api', progress: 1 }) | ||
progressUpdate(1); | ||
base.Recognize(null) | ||
base.Recognize(null); | ||
var result = dump(Module, base) | ||
var result = dump(Module, base); | ||
@@ -142,38 +142,32 @@ base.End(); | ||
function handleDetect(req, res){ | ||
handleInit(req, res) | ||
handleInit(req, res); | ||
req.options.lang = 'osd'; | ||
loadLanguage(req, res, function(){ | ||
loadLanguage(req, res, () => { | ||
base.Init(null, 'osd'); | ||
base.SetPageSegMode(Module.PSM_OSD_ONLY); | ||
base.Init(null, 'osd') | ||
base.SetPageSegMode(Module.PSM_OSD_ONLY) | ||
var ptr = setImage(Module, base, req.image); | ||
var ptr = setImage(Module, base, req.image), | ||
results = new Module.OSResults(); | ||
var results = new Module.OSResults(); | ||
var success = base.DetectOS(results); | ||
if(!success){ | ||
if(!base.DetectOS(results)){ | ||
base.End(); | ||
Module._free(ptr); | ||
res.reject("failed to detect os") | ||
res.reject("Failed to detect OS"); | ||
} else { | ||
var charset = results.get_unicharset() | ||
var best = results.get_best_result() | ||
var oid = best.get_orientation_id(), | ||
var best = results.get_best_result(), | ||
oid = best.get_orientation_id(), | ||
sid = best.get_script_id(); | ||
var result = { | ||
base.End(); | ||
Module._free(ptr); | ||
res.resolve({ | ||
tesseract_script_id: sid, | ||
script: charset.get_script_from_script_id(sid), | ||
script: results.get_unicharset().get_script_from_script_id(sid), | ||
script_confidence: best.get_sconfidence(), | ||
orientation_degrees: [0, 270, 180, 90][oid], | ||
orientation_confidence: best.get_oconfidence() | ||
} | ||
base.End(); | ||
Module._free(ptr); | ||
res.resolve(result) | ||
}); | ||
} | ||
}) | ||
}); | ||
} |
const adapter = require('./node/index.js') | ||
const circularize = require('./common/circularize.js') | ||
const TesseractJob = require('./common/job'); | ||
const objectAssign = require('object-assign'); | ||
const version = require('../package.json').version; | ||
function create(workerOptions){ | ||
workerOptions = workerOptions || {}; | ||
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions)) | ||
function create(workerOptions = {}){ | ||
var worker = new TesseractWorker(Object.assign({}, adapter.defaultOptions, workerOptions)); | ||
worker.create = create; | ||
@@ -20,21 +18,16 @@ worker.version = version; | ||
this._currentJob = null; | ||
this._queue = [] | ||
this._queue = []; | ||
} | ||
recognize(image, options){ | ||
recognize(image, options = {}){ | ||
return this._delay(job => { | ||
if(typeof options === 'string'){ | ||
options = { lang: options }; | ||
}else{ | ||
options = options || {} | ||
options.lang = options.lang || 'eng'; | ||
} | ||
if (typeof options === 'string') options = {lang: options} | ||
options.lang = options.lang || 'eng'; | ||
job._send('recognize', { image: image, options: options, workerOptions: this.workerOptions }) | ||
job._send('recognize', { image, options, workerOptions: this.workerOptions }); | ||
}) | ||
} | ||
detect(image, options){ | ||
options = options || {} | ||
detect(image, options = {}){ | ||
return this._delay(job => { | ||
job._send('detect', { image: image, options: options, workerOptions: this.workerOptions }) | ||
job._send('detect', { image, options, workerOptions: this.workerOptions }); | ||
}) | ||
@@ -46,2 +39,4 @@ } | ||
this.worker = null; | ||
this._currentJob = null; | ||
this._queue = []; | ||
} | ||
@@ -54,8 +49,8 @@ | ||
this._queue.push(e => { | ||
this._queue.shift() | ||
this._queue.shift(); | ||
this._currentJob = job; | ||
fn(job) | ||
}) | ||
fn(job); | ||
}); | ||
if(!this._currentJob) this._dequeue(); | ||
return job | ||
return job; | ||
} | ||
@@ -65,4 +60,4 @@ | ||
this._currentJob = null; | ||
if(this._queue.length > 0){ | ||
this._queue[0]() | ||
if(this._queue.length){ | ||
this._queue[0](); | ||
} | ||
@@ -72,3 +67,2 @@ } | ||
_recv(packet){ | ||
if(packet.status === 'resolve' && packet.action === 'recognize'){ | ||
@@ -80,3 +74,3 @@ packet.data = circularize(packet.data); | ||
this._currentJob._handle(packet) | ||
}else{ | ||
} else { | ||
console.warn('Job ID ' + packet.jobId + ' not known.') | ||
@@ -87,4 +81,2 @@ } | ||
var DefaultTesseract = create() | ||
module.exports = DefaultTesseract | ||
module.exports = create(); |
@@ -1,18 +0,16 @@ | ||
const path = require('path') | ||
const fetch = require('node-fetch') | ||
const isURL = require('is-url') | ||
const fetch = require('isomorphic-fetch'), | ||
isURL = require('is-url'), | ||
fork = require('child_process').fork, | ||
fs = require('fs'); | ||
exports.defaultOptions = { | ||
workerPath: path.join(__dirname, 'worker.js'), | ||
langPath: 'http://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', | ||
workerPath: require('path').join(__dirname, 'worker.js'), | ||
langPath: 'http://rawcdn.githack.com/naptha/tessdata/gh-pages/3.02/', | ||
} | ||
const fork = require('child_process').fork; | ||
const fs = require('fs') | ||
exports.spawnWorker = function spawnWorker(instance, workerOptions){ | ||
var cp = fork(workerOptions.workerPath); | ||
cp.on('message', function(packet){ | ||
instance._recv(packet) | ||
}) | ||
cp.on('message', packet => { | ||
instance._recv(packet); | ||
}); | ||
return cp; | ||
@@ -22,10 +20,10 @@ } | ||
exports.terminateWorker = function(instance){ | ||
instance.worker.kill() | ||
instance.worker.kill(); | ||
} | ||
exports.sendPacket = function sendPacket(instance, packet){ | ||
loadImage(packet.payload.image, function(img){ | ||
packet.payload.image = img | ||
instance.worker.send(packet) | ||
}) | ||
loadImage(packet.payload.image, img => { | ||
packet.payload.image = img; | ||
instance.worker.send(packet); | ||
}); | ||
} | ||
@@ -37,9 +35,6 @@ | ||
if(isURL(image)) { | ||
fetch(image).then(function (resp) { | ||
return resp.buffer(); | ||
}).then(function (buffer) { | ||
return loadImage(buffer, cb); | ||
}).catch(function (err) { | ||
return console.error(err); | ||
}); | ||
fetch(image) | ||
.then(resp => resp.buffer()) | ||
.then(buffer => loadImage(buffer, cb)) | ||
.catch(err => console.error(err)); | ||
} | ||
@@ -50,8 +45,7 @@ | ||
if (err) throw err; | ||
loadImage(buffer, cb) | ||
}) | ||
return | ||
}else if(image instanceof Buffer){ | ||
var fileType = require('file-type'); | ||
var mime = fileType(image).mime | ||
loadImage(buffer, cb); | ||
}); | ||
return; | ||
} else if (image instanceof Buffer){ | ||
var mime = require('file-type')(image).mime | ||
@@ -74,16 +68,14 @@ if(mime === 'image/png'){ | ||
image.data[offset] = pix[0] | ||
image.data[offset + 1] = pix[1] | ||
image.data[offset + 2] = pix[2] | ||
image.data[offset] = pix[0]; | ||
image.data[offset + 1] = pix[1]; | ||
image.data[offset + 2] = pix[2]; | ||
image.data[offset + 3] = pix[3]; | ||
} | ||
} | ||
// console.log(image) | ||
loadImage(image, cb) | ||
loadImage(image, cb); | ||
}); | ||
return | ||
}else if(mime === 'image/jpeg'){ | ||
var jpeg = require('jpeg-js'); | ||
loadImage(jpeg.decode(image), cb) | ||
return | ||
return; | ||
} else if (mime === 'image/jpeg'){ | ||
loadImage(require('jpeg-js').decode(image), cb); | ||
return; | ||
} | ||
@@ -97,6 +89,6 @@ | ||
if(image && image.data && image.data.length && !Array.isArray(image.data)){ | ||
image.data = Array.from(image.data) | ||
image.data = Array.from(image.data); | ||
return loadImage(image, cb) | ||
} | ||
cb(image) | ||
} | ||
cb(image); | ||
} |
const http = require("http"), | ||
zlib = require("zlib"), | ||
fs = require("fs"), | ||
path = require("path"); | ||
path = require("path"), | ||
isURL = require("is-url"); | ||
@@ -9,10 +10,15 @@ var langdata = require('../common/langdata.json') | ||
function getLanguageData(req, res, cb){ | ||
var lang = req.options.lang; | ||
var langfile = lang + '.traineddata.gz'; | ||
var url = req.workerOptions.langPath + langfile; | ||
var lang = req.options.lang, | ||
langfile = lang + '.traineddata.gz'; | ||
fs.readFile(lang + '.traineddata', function (err, data) { | ||
// langPath defaults to a URL where languages can be downloaded. If a custom path is specified | ||
// and it is a local path, use that instead | ||
var localPath = isURL(req.workerOptions.langPath) ? | ||
lang + '.traineddata' : | ||
path.join(req.workerOptions.langPath, lang + '.traineddata'); | ||
fs.readFile(localPath, function (err, data) { | ||
if(!err) return cb(new Uint8Array(data)); | ||
http.get(url, function(stream){ | ||
http.get(req.workerOptions.langPath + langfile, stream => { | ||
var received_bytes = 0; | ||
@@ -24,3 +30,3 @@ stream.on('data', function(chunk) { | ||
loaded: received_bytes, | ||
progress: Math.min(1, received_bytes / langdata[lang]) | ||
progress: Math.min(1, received_bytes / langdata[lang]) | ||
}); | ||
@@ -32,4 +38,6 @@ | ||
stream.pipe(gunzip).pipe(fs.createWriteStream(lang + '.traineddata')) | ||
gunzip.on('end', function(){ getLanguageData(req, stream, cb) }) | ||
}) | ||
gunzip.on('end',() => { | ||
getLanguageData(req, stream, cb) | ||
}); | ||
}); | ||
}); | ||
@@ -39,2 +47,2 @@ } | ||
module.exports = getLanguageData; | ||
module.exports = getLanguageData; |
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
9203
300
13
701004
9
28
4
+ Addedisomorphic-fetch@^2.2.1
+ Addedisomorphic-fetch@2.2.1(transitive)
+ Addedwhatwg-fetch@3.6.20(transitive)