🚀 Big News: Socket Acquires Coana to Bring Reachability Analysis to Every Appsec Team.Learn more
Socket
Book a DemoInstallSign in
Socket

tesseract.js

Package Overview
Dependencies
Maintainers
2
Versions
70
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

tesseract.js - npm Package Compare versions

Comparing version

to
1.0.2

demo.gif

737

dist/tesseract.js
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Tesseract = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
exports.defaultOptions = {
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
workerPath: 'dist/worker.js',
tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
'use strict';
/* eslint-disable no-unused-vars */
var hasOwnProperty = Object.prototype.hasOwnProperty;
var propIsEnumerable = Object.prototype.propertyIsEnumerable;
function toObject(val) {
if (val === null || val === undefined) {
throw new TypeError('Object.assign cannot be called with null or undefined');
}
return Object(val);
}
exports.spawnWorker = function spawnWorker(instance, workerOptions){
var worker = new Worker(workerOptions.workerPath)
worker.onmessage = function(e){
var packet = e.data;
instance._recv(packet)
function shouldUseNative() {
try {
if (!Object.assign) {
return false;
}
// Detect buggy property enumeration order in older V8 versions.
// https://bugs.chromium.org/p/v8/issues/detail?id=4118
var test1 = new String('abc'); // eslint-disable-line
test1[5] = 'de';
if (Object.getOwnPropertyNames(test1)[0] === '5') {
return false;
}
// https://bugs.chromium.org/p/v8/issues/detail?id=3056
var test2 = {};
for (var i = 0; i < 10; i++) {
test2['_' + String.fromCharCode(i)] = i;
}
var order2 = Object.getOwnPropertyNames(test2).map(function (n) {
return test2[n];
});
if (order2.join('') !== '0123456789') {
return false;
}
// https://bugs.chromium.org/p/v8/issues/detail?id=3056
var test3 = {};
'abcdefghijklmnopqrst'.split('').forEach(function (letter) {
test3[letter] = letter;
});
if (Object.keys(Object.assign({}, test3)).join('') !==
'abcdefghijklmnopqrst') {
return false;
}
return true;
} catch (e) {
// We don't expect any of the above to throw, but better to be safe.
return false;
}
}
module.exports = shouldUseNative() ? Object.assign : function (target, source) {
var from;
var to = toObject(target);
var symbols;
for (var s = 1; s < arguments.length; s++) {
from = Object(arguments[s]);
for (var key in from) {
if (hasOwnProperty.call(from, key)) {
to[key] = from[key];
}
}
if (Object.getOwnPropertySymbols) {
symbols = Object.getOwnPropertySymbols(from);
for (var i = 0; i < symbols.length; i++) {
if (propIsEnumerable.call(from, symbols[i])) {
to[symbols[i]] = from[symbols[i]];
}
}
}
}
return to;
};
},{}],2:[function(require,module,exports){
// shim for using process in browser
var process = module.exports = {};
// cached from whatever global is present so that test runners that stub it
// don't break things. But we need to wrap it in a try catch in case it is
// wrapped in strict mode code which doesn't define any globals. It's inside a
// function because try/catches deoptimize in certain engines.
var cachedSetTimeout;
var cachedClearTimeout;
function defaultSetTimout() {
throw new Error('setTimeout has not been defined');
}
function defaultClearTimeout () {
throw new Error('clearTimeout has not been defined');
}
(function () {
try {
if (typeof setTimeout === 'function') {
cachedSetTimeout = setTimeout;
} else {
cachedSetTimeout = defaultSetTimout;
}
} catch (e) {
cachedSetTimeout = defaultSetTimout;
}
return worker
try {
if (typeof clearTimeout === 'function') {
cachedClearTimeout = clearTimeout;
} else {
cachedClearTimeout = defaultClearTimeout;
}
} catch (e) {
cachedClearTimeout = defaultClearTimeout;
}
} ())
function runTimeout(fun) {
if (cachedSetTimeout === setTimeout) {
//normal enviroments in sane situations
return setTimeout(fun, 0);
}
// if setTimeout wasn't available but was latter defined
if ((cachedSetTimeout === defaultSetTimout || !cachedSetTimeout) && setTimeout) {
cachedSetTimeout = setTimeout;
return setTimeout(fun, 0);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedSetTimeout(fun, 0);
} catch(e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedSetTimeout.call(null, fun, 0);
} catch(e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error
return cachedSetTimeout.call(this, fun, 0);
}
}
}
function runClearTimeout(marker) {
if (cachedClearTimeout === clearTimeout) {
//normal enviroments in sane situations
return clearTimeout(marker);
}
// if clearTimeout wasn't available but was latter defined
if ((cachedClearTimeout === defaultClearTimeout || !cachedClearTimeout) && clearTimeout) {
cachedClearTimeout = clearTimeout;
return clearTimeout(marker);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedClearTimeout(marker);
} catch (e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedClearTimeout.call(null, marker);
} catch (e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error.
// Some versions of I.E. have different rules for clearTimeout vs setTimeout
return cachedClearTimeout.call(this, marker);
}
}
exports.terminateWorker = function(instance){
instance.worker.terminate()
}
var queue = [];
var draining = false;
var currentQueue;
var queueIndex = -1;
exports.sendPacket = function sendPacket(instance, packet){
loadImage(packet.payload.image, function(img){
packet.payload.image = img
instance.worker.postMessage(packet)
})
function cleanUpNextTick() {
if (!draining || !currentQueue) {
return;
}
draining = false;
if (currentQueue.length) {
queue = currentQueue.concat(queue);
} else {
queueIndex = -1;
}
if (queue.length) {
drainQueue();
}
}
function drainQueue() {
if (draining) {
return;
}
var timeout = runTimeout(cleanUpNextTick);
draining = true;
function loadImage(image, cb){
if(typeof image === 'string'){
if(/^\#/.test(image)){
var len = queue.length;
while(len) {
currentQueue = queue;
queue = [];
while (++queueIndex < len) {
if (currentQueue) {
currentQueue[queueIndex].run();
}
}
queueIndex = -1;
len = queue.length;
}
currentQueue = null;
draining = false;
runClearTimeout(timeout);
}
process.nextTick = function (fun) {
var args = new Array(arguments.length - 1);
if (arguments.length > 1) {
for (var i = 1; i < arguments.length; i++) {
args[i - 1] = arguments[i];
}
}
queue.push(new Item(fun, args));
if (queue.length === 1 && !draining) {
runTimeout(drainQueue);
}
};
// v8 likes predictible objects
function Item(fun, array) {
this.fun = fun;
this.array = array;
}
Item.prototype.run = function () {
this.fun.apply(null, this.array);
};
process.title = 'browser';
process.browser = true;
process.env = {};
process.argv = [];
process.version = ''; // empty string to avoid regexp issues
process.versions = {};
function noop() {}
process.on = noop;
process.addListener = noop;
process.once = noop;
process.off = noop;
process.removeListener = noop;
process.removeAllListeners = noop;
process.emit = noop;
process.binding = function (name) {
throw new Error('process.binding is not supported');
};
process.cwd = function () { return '/' };
process.chdir = function (dir) {
throw new Error('process.chdir is not supported');
};
process.umask = function() { return 0; };
},{}],3:[function(require,module,exports){
(function (process){
'use strict';
var defaultOptions = {
workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'
};
if (process.env.NODE_ENV === "development") {
console.debug('Using Development Configuration');
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js';
}
exports.defaultOptions = defaultOptions;
exports.spawnWorker = function spawnWorker(instance, workerOptions) {
if (window.Blob && window.URL) {
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");']);
var worker = new Worker(window.URL.createObjectURL(blob));
} else {
var worker = new Worker(workerOptions.workerPath);
}
worker.onmessage = function (e) {
var packet = e.data;
instance._recv(packet);
};
return worker;
};
exports.terminateWorker = function (instance) {
instance.worker.terminate();
};
exports.sendPacket = function sendPacket(instance, packet) {
loadImage(packet.payload.image, function (img) {
packet.payload.image = img;
instance.worker.postMessage(packet);
});
};
function loadImage(image, cb) {
if (typeof image === 'string') {
if (/^\#/.test(image)) {
// element css selector
return loadImage(document.querySelector(image), cb)
}else{
// url or path
var im = new Image
return loadImage(document.querySelector(image), cb);
} else if (/(blob|data)\:/.test(image)) {
// data url
var im = new Image();
im.src = image;
im.onload = e => loadImage(im, cb);
return
im.onload = function (e) {
return loadImage(im, cb);
};
return;
} else {
var xhr = new XMLHttpRequest();
xhr.open('GET', image, true);
xhr.responseType = "blob";
xhr.onload = function (e) {
return loadImage(xhr.response, cb);
};
xhr.onerror = function (e) {
if (/^https?:\/\//.test(image) && !/^https:\/\/crossorigin.me/.test(image)) {
console.debug('Attempting to load image with CORS proxy');
loadImage('https://crossorigin.me/' + image, cb);
}
};
xhr.send(null);
return;
}
}else if(image instanceof File){
} else if (image instanceof File) {
// files
var fr = new FileReader()
fr.onload = e => loadImage(fr.result, cb);
fr.readAsDataURL(image)
return
}else if(image instanceof Blob){
return loadImage(URL.createObjectURL(image), cb)
}else if(image.getContext){
var fr = new FileReader();
fr.onload = function (e) {
return loadImage(fr.result, cb);
};
fr.readAsDataURL(image);
return;
} else if (image instanceof Blob) {
return loadImage(URL.createObjectURL(image), cb);
} else if (image.getContext) {
// canvas element
return loadImage(image.getContext('2d'), cb)
}else if(image.tagName == "IMG" || image.tagName == "VIDEO"){
return loadImage(image.getContext('2d'), cb);
} else if (image.tagName == "IMG" || image.tagName == "VIDEO") {
// image element or video element
var c = document.createElement('canvas');
c.width = image.naturalWidth || image.videoWidth;
c.width = image.naturalWidth || image.videoWidth;
c.height = image.naturalHeight || image.videoHeight;
var ctx = c.getContext('2d');
ctx.drawImage(image, 0, 0);
return loadImage(ctx, cb)
}else if(image.getImageData){
return loadImage(ctx, cb);
} else if (image.getImageData) {
// canvas context
var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
return loadImage(data, cb)
return loadImage(data, cb);
} else {
return cb(image);
}
cb(image)
throw new Error('Missing return in loadImage cascade');
}
},{}],2:[function(require,module,exports){
}).call(this,require('_process'))
},{"_process":2}],4:[function(require,module,exports){
"use strict";
// The result of dump.js is a big JSON tree

@@ -76,23 +382,23 @@ // which can be easily serialized (for instance

module.exports = function circularize(page){
page.paragraphs = []
page.lines = []
page.words = []
page.symbols = []
module.exports = function circularize(page) {
page.paragraphs = [];
page.lines = [];
page.words = [];
page.symbols = [];
page.blocks.forEach(function(block){
page.blocks.forEach(function (block) {
block.page = page;
block.lines = []
block.words = []
block.symbols = []
block.lines = [];
block.words = [];
block.symbols = [];
block.paragraphs.forEach(function(para){
block.paragraphs.forEach(function (para) {
para.block = block;
para.page = page;
para.words = []
para.symbols = []
para.lines.forEach(function(line){
para.words = [];
para.symbols = [];
para.lines.forEach(function (line) {
line.paragraph = para;

@@ -102,5 +408,5 @@ line.block = block;

line.symbols = []
line.symbols = [];
line.words.forEach(function(word){
line.words.forEach(function (word) {
word.line = line;

@@ -110,3 +416,3 @@ word.paragraph = para;

word.page = page;
word.symbols.forEach(function(sym){
word.symbols.forEach(function (sym) {
sym.word = word;

@@ -117,167 +423,214 @@ sym.line = line;

sym.page = page;
sym.line.symbols.push(sym)
sym.paragraph.symbols.push(sym)
sym.block.symbols.push(sym)
sym.page.symbols.push(sym)
})
word.paragraph.words.push(word)
word.block.words.push(word)
word.page.words.push(word)
})
line.block.lines.push(line)
line.page.lines.push(line)
})
para.page.paragraphs.push(para)
})
})
return page
}
},{}],3:[function(require,module,exports){
"use strict";
var adapter = require('./node/index.js')
var circularize = require('./common/circularize.js')
sym.line.symbols.push(sym);
sym.paragraph.symbols.push(sym);
sym.block.symbols.push(sym);
sym.page.symbols.push(sym);
});
word.paragraph.words.push(word);
word.block.words.push(word);
word.page.words.push(word);
});
line.block.lines.push(line);
line.page.lines.push(line);
});
para.page.paragraphs.push(para);
});
});
return page;
};
},{}],5:[function(require,module,exports){
'use strict';
function createWorker(workerOptions){
return new TesseractWorker(workerOptions)
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
var adapter = require('./node/index.js');
var circularize = require('./common/circularize.js');
var objectAssign = require('object-assign');
function create(workerOptions) {
workerOptions = workerOptions || {};
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions));
worker.create = create;
return worker;
}
class TesseractWorker {
constructor(workerOptions){
var TesseractWorker = function () {
function TesseractWorker(workerOptions) {
_classCallCheck(this, TesseractWorker);
this.worker = null;
this.workerOptions = workerOptions;
this._currentJob = null;
this._queue = []
this._queue = [];
}
recognize(image, options){
return this._delay(job => {
options = options || {}
options.lang = options.lang || 'eng';
job._send('recognize', { image: image, options: options, workerOptions: this.workerOptions })
})
}
detect(image, options){
options = options || {}
return this._delay(job => {
job._send('detect', { image: image, options: options, workerOptions: this.workerOptions })
})
}
_createClass(TesseractWorker, [{
key: 'recognize',
value: function recognize(image, options) {
var _this = this;
terminate(){
if(this.worker) adapter.terminateWorker(this);
this.worker = null;
}
return this._delay(function (job) {
if (typeof options === 'string') {
options = { lang: options };
} else {
options = options || {};
options.lang = options.lang || 'eng';
}
_delay(fn){
if(!this.worker) this.worker = adapter.spawnWorker(this, this.workerOptions);
job._send('recognize', { image: image, options: options, workerOptions: _this.workerOptions });
});
}
}, {
key: 'detect',
value: function detect(image, options) {
var _this2 = this;
var job = new TesseractJob(this);
this._queue.push(e => {
this._queue.shift()
this._currentJob = job;
fn(job)
})
if(!this._currentJob) this._dequeue();
return job
}
options = options || {};
return this._delay(function (job) {
job._send('detect', { image: image, options: options, workerOptions: _this2.workerOptions });
});
}
}, {
key: 'terminate',
value: function terminate() {
if (this.worker) adapter.terminateWorker(this);
this.worker = null;
}
}, {
key: '_delay',
value: function _delay(fn) {
var _this3 = this;
_dequeue(){
this._currentJob = null;
if(this._queue.length > 0){
this._queue[0]()
if (!this.worker) this.worker = adapter.spawnWorker(this, this.workerOptions);
var job = new TesseractJob(this);
this._queue.push(function (e) {
_this3._queue.shift();
_this3._currentJob = job;
fn(job);
});
if (!this._currentJob) this._dequeue();
return job;
}
}
}, {
key: '_dequeue',
value: function _dequeue() {
this._currentJob = null;
if (this._queue.length > 0) {
this._queue[0]();
}
}
}, {
key: '_recv',
value: function _recv(packet) {
_recv(packet){
if (packet.status === 'resolve' && packet.action === 'recognize') {
packet.data = circularize(packet.data);
}
if(packet.status === 'resolve' && packet.action === 'recognize'){
packet.data = circularize(packet.data);
}
if(this._currentJob.id === packet.jobId){
this._currentJob._handle(packet)
}else{
console.warn('Job ID ' + packet.jobId + ' not known.')
if (this._currentJob.id === packet.jobId) {
this._currentJob._handle(packet);
} else {
console.warn('Job ID ' + packet.jobId + ' not known.');
}
}
}
}
}]);
return TesseractWorker;
}();
var jobCounter = 0;
class TesseractJob {
constructor(instance){
this.id = 'Job-' + (++jobCounter) + '-' + Math.random().toString(16).slice(3, 8)
var TesseractJob = function () {
function TesseractJob(instance) {
_classCallCheck(this, TesseractJob);
this.id = 'Job-' + ++jobCounter + '-' + Math.random().toString(16).slice(3, 8);
this._instance = instance;
this._resolve = []
this._reject = []
this._progress = []
this._resolve = [];
this._reject = [];
this._progress = [];
}
then(resolve, reject){
if(this._resolve.push){
this._resolve.push(resolve)
}else{
resolve(this._resolve)
}
_createClass(TesseractJob, [{
key: 'then',
value: function then(resolve, reject) {
if (this._resolve.push) {
this._resolve.push(resolve);
} else {
resolve(this._resolve);
}
if(reject) this.catch(reject);
return this;
}
catch(reject){
if(this._reject.push){
this._reject.push(reject)
}else{
reject(this._reject)
if (reject) this.catch(reject);
return this;
}
return this;
}
progress(fn){
this._progress.push(fn)
return this;
}
_send(action, payload){
adapter.sendPacket(this._instance, {
jobId: this.id,
action: action,
payload: payload
})
}
_handle(packet){
var data = packet.data;
if(packet.status === 'resolve'){
if(this._resolve.length === 0) console.debug(data);
this._resolve.forEach(fn => {
var ret = fn(data);
if(ret && typeof ret.then == 'function'){
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.')
}
})
this._resolve = data;
this._instance._dequeue()
}else if(packet.status === 'reject'){
if(this._reject.length === 0) console.error(data);
this._reject.forEach(fn => fn(data))
this._reject = data;
this._instance._dequeue()
}else if(packet.status === 'progress'){
this._progress.forEach(fn => fn(data))
}else{
console.warn('Message type unknown', packet.status)
}, {
key: 'catch',
value: function _catch(reject) {
if (this._reject.push) {
this._reject.push(reject);
} else {
reject(this._reject);
}
return this;
}
}
}
}, {
key: 'progress',
value: function progress(fn) {
this._progress.push(fn);
return this;
}
}, {
key: '_send',
value: function _send(action, payload) {
adapter.sendPacket(this._instance, {
jobId: this.id,
action: action,
payload: payload
});
}
}, {
key: '_handle',
value: function _handle(packet) {
var data = packet.data;
if (packet.status === 'resolve') {
if (this._resolve.length === 0) console.debug(data);
this._resolve.forEach(function (fn) {
var ret = fn(data);
if (ret && typeof ret.then == 'function') {
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.');
}
});
this._resolve = data;
this._instance._dequeue();
} else if (packet.status === 'reject') {
if (this._reject.length === 0) console.error(data);
this._reject.forEach(function (fn) {
return fn(data);
});
this._reject = data;
this._instance._dequeue();
} else if (packet.status === 'progress') {
this._progress.forEach(function (fn) {
return fn(data);
});
} else {
console.warn('Message type unknown', packet.status);
}
}
}]);
return TesseractJob;
}();
var DefaultTesseract = createWorker(adapter.defaultOptions)
DefaultTesseract.createWorker = createWorker;
var DefaultTesseract = create();
module.exports = DefaultTesseract
module.exports = DefaultTesseract;
},{"./common/circularize.js":2,"./node/index.js":1}]},{},[3])(3)
},{"./common/circularize.js":4,"./node/index.js":3,"object-assign":1}]},{},[5])(5)
});
{
"name": "tesseract.js",
"version": "1.0.1",
"version": "1.0.2",
"description": "Pure Javascript Multilingual OCR",

@@ -8,3 +8,3 @@ "main": "src/index.js",

"test": "echo \"Error: no test specified\" & exit 1",
"start": "watchify src/index.js -o dist/tesseract.js --standalone Tesseract & watchify src/browser/worker.js -o dist/worker.js & http-server -p 7355",
"start": "watchify src/index.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract & watchify src/browser/worker.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js & http-server -p 7355",
"build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js"

@@ -21,2 +21,3 @@ },

"browserify": "^13.1.0",
"envify": "^3.4.1",
"http-server": "^0.9.0",

@@ -29,2 +30,3 @@ "watchify": "^3.7.0"

"level-js": "^2.2.4",
"object-assign": "^4.1.0",
"pako": "^1.0.3",

@@ -31,0 +33,0 @@ "png.js": "^0.2.1",

@@ -1,8 +0,4 @@

> # UNDER CONTRUCTION
> ## Due for Release on ~~Tuesday, Oct 4, 2016~~ Friday, Oct 7, 2016
> Sorry for the delay!
# [Tesseract.js](http://tesseract.projectnaptha.com/)
Tesseract.js is a javascript library that gets words in [almost any language](./tesseract_lang_list.md) out of images.
Tesseract.js is a javascript library that gets words in [almost any language](./tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/))

@@ -12,7 +8,7 @@ <!-- Under the hood, Tesseract.js wraps [tesseract.js-core](https://github.com/naptha/tesseract.js-core), an [emscripten](https://github.com/kripken/emscripten) port of the [Tesseract OCR Engine](https://github.com/tesseract-ocr/tesseract).

![fancy demo gif](http://placehold.it/700x300 "jhgfjhgf")
[![fancy demo gif](./demo.gif "Demo")](http://tesseract.projectnaptha.com)
Tesseract.js works with script tags, webpack/browserify, and node. Once you're [set up](#installation), using it is as simple as
Tesseract.js works with script tags, webpack/browserify, and node. [After you install it](#installation), using it is as simple as
```javascript
Tesseract.recognize(my_image)
Tesseract.recognize(myImage)
.progress(function (p) { console.log('progress', p) })

@@ -28,18 +24,9 @@ .then(function (result) { console.log('result', result) })

## &lt;script/>
## &lt;script />
You can either include Tesseract.js on you page with a cdn like this:
You can simply include Tesseract.js with a cdn like this:
```html
<script src='https://cdn.rawgit.com/naptha/tesseract.js/a01d2a2/dist/tesseract.js'></script>
<script src='https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/tesseract.js'></script>
```
Or you can grab copies of `tesseract.js` and `tesseract.worker.js` from the [dist folder](https://github.com/naptha/tesseract.js/tree/master/dist) and include your local copies like this:
```html
<script src='/path/to/tesseract.js'></script>
<script>
Tesseract.workerUrl = 'http://www.absolute-path-to/tesseract.worker.js'
</script>
```
After including your scripts, the `Tesseract` variable should be defined! You can [head to the docs](#docs) for a full treatment of the API.

@@ -75,7 +62,7 @@

+ [TesseractJob.then(callback: function) -> TesseractJob](#tesseractjobthencallback-function---tesseractjob)
+ [TesseractJob.error(callback: function) -> TesseractJob](#tesseractjoberrorcallback-function---tesseractjob)
* [Tesseract Remote File Options](#tesseract-remote-file-options)
+ [Tesseract.coreUrl](#tesseractcoreurl)
+ [Tesseract.workerUrl](#tesseractworkerurl)
+ [Tesseract.langUrl](#tesseractlangurl)
+ [TesseractJob.catch(callback: function) -> TesseractJob](#tesseractjoberrorcallback-function---tesseractjob)
* [Tesseract Configuration](#tesseract-configuration)
+ [corePath](#corepath)
+ [workerPath](#workerpath)
+ [langPath](#langpath)
* [Contributing](#contributing)

@@ -90,11 +77,11 @@ + [Development](#development)

- `image` is any [ImageLike](#imagelike) object.
- `options` is an optional flat json object. `options` may:
- `options` is either absent (in which case it is interpreted as `'eng'`), a string specifing a language short code from the [language list](./tesseract_lang_list.md), or a flat json object that may:
+ include properties that override some subset of the [default tesseract parameters](./tesseract_parameters.md)
+ include a `lang` property with a value from the [list of lang parameters](./tesseract_lang_list.md)
Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, and `error` methods can be used to act on the result.
Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, and `catch` methods can be used to act on the result.
### Simple Example:
```javascript
Tesseract.recognize('#my-image')
Tesseract.recognize(myImage)
.then(function(result){

@@ -108,3 +95,3 @@ console.log(result)

// if we know our image is of spanish words without the letter 'e':
Tesseract.recognize('#my-image', {
Tesseract.recognize(myImage, {
lang: 'spa',

@@ -122,3 +109,5 @@ tessedit_char_blacklist: 'e'

## Tesseract.detect(image: [ImageLike](#imagelike)) -> [TesseractJob](#tesseractjob)
Figures out what script (e.g. 'Latin', 'Chinese') the words in image are written in.
- `image` is any [ImageLike](#imagelike) object.

@@ -130,3 +119,3 @@

```javascript
Tesseract.detect('#my-image')
Tesseract.detect(myImage)
.then(function(result){

@@ -139,20 +128,32 @@ console.log(result)

## ImageLike
The main Tesseract.js functions take an `image` parameter, which should be something that is 'image-like'.
That means `image` should be
- an `img` element or querySelector that matches an `img` element
- a `video` element or querySelector that matches a `video` element
- a `canvas` element or querySelector that matches a `canvas` element
The main Tesseract.js functions take an `image` parameter, which should be something that is like an image. What's considered "image-like" differs depending on whether it is being run from the browser or through NodeJS.
On a browser, an image can be:
- an `img`, `video`, or `canvas` element
- a CanvasRenderingContext2D (returned by `canvas.getContext('2d')`)
- the absolute `url` of an image from the same website that is running your script. Browser security policies don't allow access to the content of images from other websites :(
- a `File` object (from a file `<input>` or drag-drop event)
- a `Blob` object
- a `ImageData` instance (an object containing `width`, `height` and `data` properties)
- a path or URL to an accessible image (the image must either be hosted locally or accessible by CORS)
In NodeJS, an image can be
- a path to a local image
- a `Buffer` instance containing a `PNG` or `JPEG` image
- a `ImageData` instance (an object containing `width`, `height` and `data` properties)
## TesseractJob
A TesseractJob is an an object returned by a call to recognize or detect.
All methods of a given TesseractJob return that TesseractJob to enable chaining.
A TesseractJob is an an object returned by a call to `recognize` or `detect`. It's inspired by the ES6 Promise interface and provides `then` and `catch` methods. One important difference is that these methods return the job itself (to enable chaining) rather than new.
Typical use is:
```javascript
Tesseract.recognize('#my-image')
Tesseract.recognize(myImage)
.progress(function(message){console.log(message)})
.error(function(err){console.error(err)})
.catch(function(err){console.error(err)})
.then(function(result){console.log(result)})

@@ -163,7 +164,7 @@ ```

```javascript
var job1 = Tesseract.recognize('#my-image');
var job1 = Tesseract.recognize(myImage);
job1.progress(function(message){console.log(message)});
job1.error(function(err){console.error(err)});
job1.catch(function(err){console.error(err)});

@@ -181,4 +182,4 @@ job1.then(function(result){console.log(result)})

```javascript
Tesseract.recognize('#my-image')
.progress(function(message){console.log('progress is: 'message)})
Tesseract.recognize(myImage)
.progress(function(message){console.log('progress is: ', message)})
```

@@ -207,3 +208,3 @@

```javascript
Tesseract.recognize('#my-image')
Tesseract.recognize(myImage)
.then(function(result){console.log('result is: 'result)})

@@ -229,39 +230,26 @@ ```

### TesseractJob.error(callback: function) -> TesseractJob
### TesseractJob.catch(callback: function) -> TesseractJob
Sets `callback` as the function that will be called if the job fails.
- `callback` is a function with the signature `callback(erros)` where `error` is a json object.
## Tesseract Remote File Options
### Tesseract.coreUrl
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://cdn.rawgit.com/naptha/tesseract.js-core/master/index.js'. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file.
## Tesseract Configuration
For example:
```javascript
Tesseract.coreUrl = 'https://absolute-path-to/tesseract.js-core/index.js'
window.Tesseract = Tesseract.create({
workerPath: '/path/to/worker.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
})
```
### Tesseract.workerUrl
### corePath
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://cdn.rawgit.com/naptha/tesseract.js-core/master/index.js'. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file.
### workerPath
A string specifying the location of the [tesseract.worker.js](./dist/tesseract.worker.js) file, with default value 'https://cdn.rawgit.com/naptha/tesseract.js/8b915dc/dist/tesseract.worker.js'. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file.
For example:
```javascript
Tesseract.workerUrl = 'https://absolute-path-to/tesseract.worker.js'
```
### langPath
A string specifying the location of the tesseract language files, with default value 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'. Language file urls are calculated according to the formula `langPath + langCode + '.traineddata.gz'`. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use different language files.
### Tesseract.langUrl
A string specifying the location of the tesseract language files, with default value 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'. Language file urls are calculated according to the formula `Tesseract.langUrl + lang + '.traineddata.gz'`. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use different language files.
In the following exampple, Tesseract.js will download the language file from 'https://absolute-path-to/lang/folder/rus.traineddata.gz':
```javascript
Tesseract.langUrl = 'https://absolute-path-to/lang/folder/'
Tesseract.recognize('#my-im', {
lang: 'rus'
})
```
## Contributing

@@ -281,9 +269,10 @@ ### Development

tesseract.js@1.0.0 start /Users/guillermo/Desktop/code_static/tesseract.js
node devServer.js
Starting up http-server, serving ./
Available on:
http://127.0.0.1:7355
http://[your ip]:7355
Listening at http://localhost:7355
```
Then open `http://localhost:7355` in your favorite browser. The devServer automatically rebuilds tesseract.js and tesseract.worker.js when you change files in the src folder.
Then open `http://localhost:7355/examples/file-input/demo.html` in your favorite browser. The devServer automatically rebuilds tesseract.js and tesseract.worker.js when you change files in the src folder.

@@ -297,2 +286,2 @@ ### Building Static Files

### Send us a Pull Request!
Thanks :)
Thanks :)

@@ -1,9 +0,23 @@

exports.defaultOptions = {
var defaultOptions = {
workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
workerPath: 'dist/worker.js',
tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
}
if (process.env.NODE_ENV === "development") {
console.debug('Using Development Configuration')
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js'
}
exports.defaultOptions = defaultOptions;
exports.spawnWorker = function spawnWorker(instance, workerOptions){
var worker = new Worker(workerOptions.workerPath)
if(window.Blob && window.URL){
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");'])
var worker = new Worker(window.URL.createObjectURL(blob));
}else{
var worker = new Worker(workerOptions.workerPath)
}
worker.onmessage = function(e){

@@ -33,4 +47,4 @@ var packet = e.data;

return loadImage(document.querySelector(image), cb)
}else{
// url or path
}else if(/(blob|data)\:/.test(image)){
// data url
var im = new Image

@@ -40,2 +54,15 @@ im.src = image;

return
}else{
var xhr = new XMLHttpRequest();
xhr.open('GET', image, true)
xhr.responseType = "blob";
xhr.onload = e => loadImage(xhr.response, cb);
xhr.onerror = function(e){
if(/^https?:\/\//.test(image) && !/^https:\/\/crossorigin.me/.test(image)){
console.debug('Attempting to load image with CORS proxy')
loadImage('https://crossorigin.me/' + image, cb)
}
}
xhr.send(null)
return
}

@@ -65,4 +92,7 @@ }else if(image instanceof File){

return loadImage(data, cb)
}else{
return cb(image)
}
cb(image)
throw new Error('Missing return in loadImage cascade')
}

@@ -1,2 +0,2 @@

var leveljs = require('level-js')
const leveljs = require('level-js')
var db = typeof indexedDB === 'undefined' ? { open: (_, cb) => cb(true) } : leveljs('./tessdata2')

@@ -25,3 +25,3 @@

var ungzip = require('pako').ungzip;
const ungzip = require('pako').ungzip;

@@ -34,4 +34,4 @@ function fetchLanguageData(req, res, cb){

var xhr = new XMLHttpRequest();
xhr.open('GET', url, true);
xhr.responseType = 'arraybuffer';
xhr.open('GET', url, true);
xhr.onerror = e => {

@@ -38,0 +38,0 @@ xhr.onprogress = xhr.onload = null

@@ -1,4 +0,6 @@

"use strict";
const workerUtils = require('../common/worker.js')
var workerUtils = require('../common/worker.js')
if (process.env.NODE_ENV === "development") {
console.debug('Using Development Worker')
}

@@ -12,5 +14,5 @@ global.addEventListener('message', function(e){

if(!global.TesseractCore){
res.progress({ status: 'loading tesseract core' })
importScripts(req.workerOptions.tesseractPath)
res.progress({ status: 'loaded tesseract core' })
res.progress({ status: 'loading tesseract core', progress: 0 })
importScripts(req.workerOptions.corePath)
res.progress({ status: 'loading tesseract core', progress: 1 })
}

@@ -17,0 +19,0 @@ return TesseractCore

@@ -38,3 +38,3 @@ var latestJob;

res.progress({ status: 'initializing tesseract api' })
res.progress({ status: 'initializing tesseract', progress: 0 })
Module = Core({

@@ -49,3 +49,3 @@ TOTAL_MEMORY: req.memory,

base = new Module.TessBaseAPI()
res.progress({ status: 'initialized tesseract api' })
res.progress({ status: 'initializing tesseract', progress: 1 })
}

@@ -79,3 +79,3 @@ }

Module.FS_createDataFile('tessdata', lang + ".traineddata", data, true, false);
res.progress({ status: 'loaded ' + lang + '.traineddata' })
res.progress({ status: 'loading ' + lang + '.traineddata', progress: 1 })
Module._loadedLanguages[lang] = true;

@@ -94,5 +94,7 @@ cb()

res.progress({ status: 'initializing api', progress: 0 })
base.Init(null, lang)
res.progress({ status: 'initialized with language' })
res.progress({ status: 'initializing api', progress: 0.3 })
var options = req.options;
for (var option in options) {

@@ -104,3 +106,6 @@ if (options.hasOwnProperty(option)) {

res.progress({ status: 'initializing api', progress: 0.6 })
var ptr = setImage(Module, base, req.image);
res.progress({ status: 'initializing api', progress: 1 })
base.Recognize(null)

@@ -107,0 +112,0 @@

@@ -1,9 +0,10 @@

"use strict";
const adapter = require('./node/index.js')
const circularize = require('./common/circularize.js')
const objectAssign = require('object-assign');
var adapter = require('./node/index.js')
var circularize = require('./common/circularize.js')
function createWorker(workerOptions){
return new TesseractWorker(workerOptions)
function create(workerOptions){
workerOptions = workerOptions || {};
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions))
worker.create = create;
return worker;
}

@@ -21,4 +22,9 @@

return this._delay(job => {
options = options || {}
options.lang = options.lang || 'eng';
if(typeof options === 'string'){
options = { lang: options };
}else{
options = options || {}
options.lang = options.lang || 'eng';
}
job._send('recognize', { image: image, options: options, workerOptions: this.workerOptions })

@@ -141,6 +147,5 @@ })

var DefaultTesseract = createWorker(adapter.defaultOptions)
DefaultTesseract.createWorker = createWorker;
var DefaultTesseract = create()
module.exports = DefaultTesseract

@@ -1,2 +0,2 @@

var path = require('path')
const path = require('path')

@@ -8,4 +8,4 @@ exports.defaultOptions = {

var fork = require('child_process').fork;
var fs = require('fs')
const fork = require('child_process').fork;
const fs = require('fs')

@@ -33,2 +33,3 @@ exports.spawnWorker = function spawnWorker(instance, workerOptions){

function loadImage(image, cb){
// TODO: support URLs
if(typeof image === 'string'){

@@ -43,3 +44,2 @@ fs.readFile(image, function(err, buffer){

if(mime === 'image/png'){

@@ -46,0 +46,0 @@ var PNGReader = require('png.js');

@@ -1,5 +0,5 @@

var http = require("http"),
zlib = require("zlib"),
fs = require("fs"),
path = require("path");
const http = require("http"),
zlib = require("zlib"),
fs = require("fs"),
path = require("path");

@@ -6,0 +6,0 @@ var langdata = require('../common/langdata.json')

@@ -1,5 +0,3 @@

"use strict";
const workerUtils = require('../common/worker.js')
var workerUtils = require('../common/worker.js')
process.on('message', function(packet){

@@ -6,0 +4,0 @@ workerUtils.dispatchHandlers(packet, obj => process.send(obj))

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet