Socket
Socket
Sign inDemoInstall

tesseract.js

Package Overview
Dependencies
13
Maintainers
2
Versions
67
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 1.0.3 to 1.0.5

LICENSE.md

254

dist/tesseract.js

@@ -269,2 +269,46 @@ (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Tesseract = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){

},{}],3:[function(require,module,exports){
module.exports={
"name": "tesseract.js",
"version": "1.0.5",
"description": "Pure Javascript Multilingual OCR",
"main": "src/index.js",
"scripts": {
"test": "echo \"Error: no test specified\" & exit 1",
"start": "watchify src/index.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract & watchify src/browser/worker.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js & http-server -p 7355",
"build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js",
"release": "git tag `jq -r '.version' package.json`"
},
"browser": {
"./src/node/index.js": "./src/browser/index.js"
},
"author": "",
"license": "Apache",
"devDependencies": {
"babel-preset-es2015": "^6.16.0",
"babelify": "^7.3.0",
"browserify": "^13.1.0",
"envify": "^3.4.1",
"http-server": "^0.9.0",
"watchify": "^3.7.0",
"pako": "^1.0.3"
},
"dependencies": {
"file-type": "^3.8.0",
"jpeg-js": "^0.2.0",
"level-js": "^2.2.4",
"object-assign": "^4.1.0",
"png.js": "^0.2.1",
"tesseract.js-core": "^1.0.2"
},
"repository": {
"type": "git",
"url": "https://github.com/naptha/tesseract.js.git"
},
"bugs": {
"url": "https://github.com/naptha/tesseract.js/issues"
},
"homepage": "https://github.com/naptha/tesseract.js"
}
},{}],4:[function(require,module,exports){
(function (process){

@@ -274,3 +318,3 @@ 'use strict';

var defaultOptions = {
workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
// workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',

@@ -283,2 +327,5 @@ langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'

defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js';
} else {
var version = require('../../package.json').version;
defaultOptions.workerPath = 'https://cdn.rawgit.com/naptha/tesseract.js/' + version + '/dist/worker.js';
}

@@ -375,3 +422,3 @@

}).call(this,require('_process'))
},{"_process":2}],4:[function(require,module,exports){
},{"../../package.json":3,"_process":2}],5:[function(require,module,exports){
"use strict";

@@ -443,3 +490,3 @@

},{}],5:[function(require,module,exports){
},{}],6:[function(require,module,exports){
'use strict';

@@ -451,5 +498,118 @@

var adapter = require('../node/index.js');
var jobCounter = 0;
module.exports = function () {
function TesseractJob(instance) {
_classCallCheck(this, TesseractJob);
this.id = 'Job-' + ++jobCounter + '-' + Math.random().toString(16).slice(3, 8);
this._instance = instance;
this._resolve = [];
this._reject = [];
this._progress = [];
this._finally = [];
}
_createClass(TesseractJob, [{
key: 'then',
value: function then(resolve, reject) {
if (this._resolve.push) {
this._resolve.push(resolve);
} else {
resolve(this._resolve);
}
if (reject) this.catch(reject);
return this;
}
}, {
key: 'catch',
value: function _catch(reject) {
if (this._reject.push) {
this._reject.push(reject);
} else {
reject(this._reject);
}
return this;
}
}, {
key: 'progress',
value: function progress(fn) {
this._progress.push(fn);
return this;
}
}, {
key: 'finally',
value: function _finally(fn) {
this._finally.push(fn);
return this;
}
}, {
key: '_send',
value: function _send(action, payload) {
adapter.sendPacket(this._instance, {
jobId: this.id,
action: action,
payload: payload
});
}
}, {
key: '_handle',
value: function _handle(packet) {
var data = packet.data;
var runFinallyCbs = false;
if (packet.status === 'resolve') {
if (this._resolve.length === 0) console.debug(data);
this._resolve.forEach(function (fn) {
var ret = fn(data);
if (ret && typeof ret.then == 'function') {
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.');
}
});
this._resolve = data;
this._instance._dequeue();
runFinallyCbs = true;
} else if (packet.status === 'reject') {
if (this._reject.length === 0) console.error(data);
this._reject.forEach(function (fn) {
return fn(data);
});
this._reject = data;
this._instance._dequeue();
runFinallyCbs = true;
} else if (packet.status === 'progress') {
this._progress.forEach(function (fn) {
return fn(data);
});
} else {
console.warn('Message type unknown', packet.status);
}
if (runFinallyCbs) {
this._finally.forEach(function (fn) {
return fn(data);
});
}
}
}]);
return TesseractJob;
}();
},{"../node/index.js":4}],7:[function(require,module,exports){
'use strict';
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
var adapter = require('./node/index.js');
var circularize = require('./common/circularize.js');
var TesseractJob = require('./common/job');
var objectAssign = require('object-assign');
var version = require('../package.json').version;

@@ -460,2 +620,3 @@ function create(workerOptions) {

worker.create = create;
worker.version = version;
return worker;

@@ -549,87 +710,2 @@ }

var jobCounter = 0;
var TesseractJob = function () {
function TesseractJob(instance) {
_classCallCheck(this, TesseractJob);
this.id = 'Job-' + ++jobCounter + '-' + Math.random().toString(16).slice(3, 8);
this._instance = instance;
this._resolve = [];
this._reject = [];
this._progress = [];
}
_createClass(TesseractJob, [{
key: 'then',
value: function then(resolve, reject) {
if (this._resolve.push) {
this._resolve.push(resolve);
} else {
resolve(this._resolve);
}
if (reject) this.catch(reject);
return this;
}
}, {
key: 'catch',
value: function _catch(reject) {
if (this._reject.push) {
this._reject.push(reject);
} else {
reject(this._reject);
}
return this;
}
}, {
key: 'progress',
value: function progress(fn) {
this._progress.push(fn);
return this;
}
}, {
key: '_send',
value: function _send(action, payload) {
adapter.sendPacket(this._instance, {
jobId: this.id,
action: action,
payload: payload
});
}
}, {
key: '_handle',
value: function _handle(packet) {
var data = packet.data;
if (packet.status === 'resolve') {
if (this._resolve.length === 0) console.debug(data);
this._resolve.forEach(function (fn) {
var ret = fn(data);
if (ret && typeof ret.then == 'function') {
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.');
}
});
this._resolve = data;
this._instance._dequeue();
} else if (packet.status === 'reject') {
if (this._reject.length === 0) console.error(data);
this._reject.forEach(function (fn) {
return fn(data);
});
this._reject = data;
this._instance._dequeue();
} else if (packet.status === 'progress') {
this._progress.forEach(function (fn) {
return fn(data);
});
} else {
console.warn('Message type unknown', packet.status);
}
}
}]);
return TesseractJob;
}();
var DefaultTesseract = create();

@@ -639,3 +715,3 @@

},{"./common/circularize.js":4,"./node/index.js":3,"object-assign":1}]},{},[5])(5)
},{"../package.json":3,"./common/circularize.js":5,"./common/job":6,"./node/index.js":4,"object-assign":1}]},{},[7])(7)
});

@@ -21,3 +21,3 @@ # Tesseract Languages

| 'ell' | Greek |
| 'eng' | Enlish |
| 'eng' | English |
| 'enm' | English (Old) |

@@ -24,0 +24,0 @@ | 'epo' | Esperanto |

@@ -0,6 +1,14 @@

var path = require('path');
var Tesseract = require('../../') // replace this with require('tesseract.js')
var image = path.resolve(__dirname, 'cosmic.png');
Tesseract.recognize('cosmic.png')
.then(function(data){
console.log(data.text)
})
Tesseract.recognize(image)
.then(data => {
console.log('then\n', data.text)
})
.catch(err => {
console.log('catch\n', err);
})
.finally(e => {
console.log('finally\n');
});

@@ -0,4 +1,6 @@

var path = require('path');
var Tesseract = require('../../') // replace this with require('tesseract.js')
var image = path.resolve(__dirname, 'cosmic.png');
Tesseract.detect('cosmic.png')
Tesseract.detect(image)
.progress(function(info){

@@ -5,0 +7,0 @@ console.log(info)

{
"name": "tesseract.js",
"version": "1.0.3",
"version": "1.0.5",
"description": "Pure Javascript Multilingual OCR",

@@ -9,3 +9,4 @@ "main": "src/index.js",

"start": "watchify src/index.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract & watchify src/browser/worker.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js & http-server -p 7355",
"build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js"
"build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js",
"release": "git tag `jq -r '.version' package.json` && git push origin --tags && npm publish"
},

@@ -16,3 +17,3 @@ "browser": {

"author": "",
"license": "MIT",
"license": "Apache",
"devDependencies": {

@@ -19,0 +20,0 @@ "babel-preset-es2015": "^6.16.0",

@@ -5,3 +5,3 @@ # [Tesseract.js](http://tesseract.projectnaptha.com/)

Tesseract.js is a javascript library that gets words in [almost any language](./tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/))
Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/))

@@ -40,3 +40,5 @@ <!-- Under the hood, Tesseract.js wraps [tesseract.js-core](https://github.com/naptha/tesseract.js-core), an [emscripten](https://github.com/kripken/emscripten) port of the [Tesseract OCR Engine](https://github.com/tesseract-ocr/tesseract).

```
> Note: Tesseract.js currently requires node v6.8.0 or greater.
Then

@@ -65,3 +67,4 @@ ```javascript

+ [TesseractJob.catch(callback: function) -> TesseractJob](#tesseractjoberrorcallback-function---tesseractjob)
* [Tesseract Configuration](#tesseract-configuration)
+ [TesseractJob.finally(callback: function) -> TesseractJob](#tesseractjobfinallycallback-function---tesseractjob)
* [Local Installation](#local-installation)
+ [corePath](#corepath)

@@ -77,9 +80,12 @@ + [workerPath](#workerpath)

## Tesseract.recognize(image: [ImageLike](#imagelike)[, options]) -> [TesseractJob](#tesseractjob)
Figures out what words are in `image`, where the words are in `image`, etc.
Figures out what words are in `image`, where the words are in `image`, etc.
> Note: `image` should be be sufficiently high resolution.
> Often, the same image will get much better results if you upscale it before calling `recognize`.
- `image` is any [ImageLike](#imagelike) object.
- `options` is either absent (in which case it is interpreted as `'eng'`), a string specifing a language short code from the [language list](./tesseract_lang_list.md), or a flat json object that may:
+ include properties that override some subset of the [default tesseract parameters](./tesseract_parameters.md)
+ include a `lang` property with a value from the [list of lang parameters](./tesseract_lang_list.md)
- `options` is either absent (in which case it is interpreted as `'eng'`), a string specifing a language short code from the [language list](./docs/tesseract_lang_list.md), or a flat json object that may:
+ include properties that override some subset of the [default tesseract parameters](./docs/tesseract_parameters.md)
+ include a `lang` property with a value from the [list of lang parameters](./docs/tesseract_lang_list.md)
Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, and `catch` methods can be used to act on the result.
Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, `catch` and `finally` methods can be used to act on the result.

@@ -115,3 +121,3 @@ ### Simple Example:

Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, and `error` methods can be used to act on the result of the script.
Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, `error` and `finally` methods can be used to act on the result of the script.

@@ -151,3 +157,3 @@

A TesseractJob is an an object returned by a call to `recognize` or `detect`. It's inspired by the ES6 Promise interface and provides `then` and `catch` methods. One important difference is that these methods return the job itself (to enable chaining) rather than new.
A TesseractJob is an an object returned by a call to `recognize` or `detect`. It's inspired by the ES6 Promise interface and provides `then` and `catch` methods. It also provides `finally` method, which will be fired regardless of the job fate. One important difference is that these methods return the job itself (to enable chaining) rather than new.

@@ -157,5 +163,6 @@ Typical use is:

Tesseract.recognize(myImage)
.progress(function(message){console.log(message)})
.catch(function(err){console.error(err)})
.then(function(result){console.log(result)})
.progress(message => console.log(message))
.catch(err => console.error(err))
.then(result => console.log(result))
.finally(resultOrError => console.log(resultOrError))
```

@@ -167,7 +174,9 @@

job1.progress(function(message){console.log(message)});
job1.progress(message => console.log(message));
job1.catch(function(err){console.error(err)});
job1.catch(err => console.error(err));
job1.then(function(result){console.log(result)})
job1.then(result => console.log(result));
job1.finally(resultOrError => console.log(resultOrError));
```

@@ -209,3 +218,3 @@

Tesseract.recognize(myImage)
.then(function(result){console.log('result is: 'result)})
.then(function(result){console.log('result is: ', result)})
```

@@ -215,3 +224,3 @@

```javascript
progress is: {
result is: {
blocks: Array[1]

@@ -233,6 +242,14 @@ confidence: 87

Sets `callback` as the function that will be called if the job fails.
- `callback` is a function with the signature `callback(erros)` where `error` is a json object.
- `callback` is a function with the signature `callback(error)` where `error` is a json object.
## Tesseract Configuration
### TesseractJob.finally(callback: function) -> TesseractJob
Sets `callback` as the function that will be called regardless if the job fails or success.
- `callback` is a function with the signature `callback(resultOrError)` where `resultOrError` is a json object.
## Local Installation
In the browser, `tesseract.js` simply provides the API layer. Internally, it opens a WebWorker to handle requests. That worker itself loads code from the Emscripten-built `tesseract.js-core` which itself is hosted on a CDN. Then it dynamically loads language files hosted on another CDN.
Because of this we recommend loading `tesseract.js` from a CDN. But if you really need to have all your files local, you can use the `Tesseract.create` function which allows you to specify custom paths for workers, languages, and core.
```javascript

@@ -239,0 +256,0 @@ window.Tesseract = Tesseract.create({

var defaultOptions = {
workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
// workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',

@@ -10,2 +10,5 @@ langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',

defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js'
}else{
var version = require('../../package.json').version;
defaultOptions.workerPath = 'https://cdn.rawgit.com/naptha/tesseract.js/' + version + '/dist/worker.js'
}

@@ -12,0 +15,0 @@

@@ -21,6 +21,10 @@ var latestJob;

if(packet.action === 'recognize'){
handleRecognize(packet.payload, respond)
}else if(packet.action === 'detect'){
handleDetect(packet.payload, respond)
try {
if(packet.action === 'recognize'){
handleRecognize(packet.payload, respond)
}else if(packet.action === 'detect'){
handleDetect(packet.payload, respond)
}
} catch (err) {
respond.reject(err)
}

@@ -36,8 +40,15 @@ }

function handleInit(req, res){
if(!Module){
var MIN_MEMORY = 100663296;
if(['chi_sim', 'chi_tra', 'jpn'].indexOf(req.options.lang) != -1){
MIN_MEMORY = 167772160;
}
if(!Module || Module.TOTAL_MEMORY < MIN_MEMORY){
var Core = adapter.getCore(req, res);
res.progress({ status: 'initializing tesseract', progress: 0 })
Module = Core({
TOTAL_MEMORY: req.memory,
TOTAL_MEMORY: MIN_MEMORY,
TesseractProgress(percent){

@@ -48,2 +59,3 @@ latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) })

})
Module.FS_createPath("/", "tessdata", true, true)

@@ -50,0 +62,0 @@ base = new Module.TessBaseAPI()

const adapter = require('./node/index.js')
const circularize = require('./common/circularize.js')
const TesseractJob = require('./common/job');
const objectAssign = require('object-assign');
const version = require('../package.json').version;

@@ -9,2 +11,3 @@ function create(workerOptions){

worker.create = create;
worker.version = version;
return worker;

@@ -79,73 +82,4 @@ }

var jobCounter = 0;
class TesseractJob {
constructor(instance){
this.id = 'Job-' + (++jobCounter) + '-' + Math.random().toString(16).slice(3, 8)
this._instance = instance;
this._resolve = []
this._reject = []
this._progress = []
}
then(resolve, reject){
if(this._resolve.push){
this._resolve.push(resolve)
}else{
resolve(this._resolve)
}
if(reject) this.catch(reject);
return this;
}
catch(reject){
if(this._reject.push){
this._reject.push(reject)
}else{
reject(this._reject)
}
return this;
}
progress(fn){
this._progress.push(fn)
return this;
}
_send(action, payload){
adapter.sendPacket(this._instance, {
jobId: this.id,
action: action,
payload: payload
})
}
_handle(packet){
var data = packet.data;
if(packet.status === 'resolve'){
if(this._resolve.length === 0) console.debug(data);
this._resolve.forEach(fn => {
var ret = fn(data);
if(ret && typeof ret.then == 'function'){
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.')
}
})
this._resolve = data;
this._instance._dequeue()
}else if(packet.status === 'reject'){
if(this._reject.length === 0) console.error(data);
this._reject.forEach(fn => fn(data))
this._reject = data;
this._instance._dequeue()
}else if(packet.status === 'progress'){
this._progress.forEach(fn => fn(data))
}else{
console.warn('Message type unknown', packet.status)
}
}
}
var DefaultTesseract = create()
module.exports = DefaultTesseract
module.exports = DefaultTesseract

@@ -35,2 +35,3 @@ const path = require('path')

fs.readFile(image, function(err, buffer){
if (err) throw err;
loadImage(buffer, cb)

@@ -37,0 +38,0 @@ })

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Packages

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc