Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

tesseract.js

Package Overview
Dependencies
Maintainers
2
Versions
68
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

tesseract.js - npm Package Compare versions

Comparing version 1.0.1 to 1.0.2

demo.gif

737

dist/tesseract.js
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Tesseract = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
exports.defaultOptions = {
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
workerPath: 'dist/worker.js',
tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
'use strict';
/* eslint-disable no-unused-vars */
var hasOwnProperty = Object.prototype.hasOwnProperty;
var propIsEnumerable = Object.prototype.propertyIsEnumerable;
function toObject(val) {
if (val === null || val === undefined) {
throw new TypeError('Object.assign cannot be called with null or undefined');
}
return Object(val);
}
exports.spawnWorker = function spawnWorker(instance, workerOptions){
var worker = new Worker(workerOptions.workerPath)
worker.onmessage = function(e){
var packet = e.data;
instance._recv(packet)
function shouldUseNative() {
try {
if (!Object.assign) {
return false;
}
// Detect buggy property enumeration order in older V8 versions.
// https://bugs.chromium.org/p/v8/issues/detail?id=4118
var test1 = new String('abc'); // eslint-disable-line
test1[5] = 'de';
if (Object.getOwnPropertyNames(test1)[0] === '5') {
return false;
}
// https://bugs.chromium.org/p/v8/issues/detail?id=3056
var test2 = {};
for (var i = 0; i < 10; i++) {
test2['_' + String.fromCharCode(i)] = i;
}
var order2 = Object.getOwnPropertyNames(test2).map(function (n) {
return test2[n];
});
if (order2.join('') !== '0123456789') {
return false;
}
// https://bugs.chromium.org/p/v8/issues/detail?id=3056
var test3 = {};
'abcdefghijklmnopqrst'.split('').forEach(function (letter) {
test3[letter] = letter;
});
if (Object.keys(Object.assign({}, test3)).join('') !==
'abcdefghijklmnopqrst') {
return false;
}
return true;
} catch (e) {
// We don't expect any of the above to throw, but better to be safe.
return false;
}
}
module.exports = shouldUseNative() ? Object.assign : function (target, source) {
var from;
var to = toObject(target);
var symbols;
for (var s = 1; s < arguments.length; s++) {
from = Object(arguments[s]);
for (var key in from) {
if (hasOwnProperty.call(from, key)) {
to[key] = from[key];
}
}
if (Object.getOwnPropertySymbols) {
symbols = Object.getOwnPropertySymbols(from);
for (var i = 0; i < symbols.length; i++) {
if (propIsEnumerable.call(from, symbols[i])) {
to[symbols[i]] = from[symbols[i]];
}
}
}
}
return to;
};
},{}],2:[function(require,module,exports){
// shim for using process in browser
var process = module.exports = {};
// cached from whatever global is present so that test runners that stub it
// don't break things. But we need to wrap it in a try catch in case it is
// wrapped in strict mode code which doesn't define any globals. It's inside a
// function because try/catches deoptimize in certain engines.
var cachedSetTimeout;
var cachedClearTimeout;
function defaultSetTimout() {
throw new Error('setTimeout has not been defined');
}
function defaultClearTimeout () {
throw new Error('clearTimeout has not been defined');
}
(function () {
try {
if (typeof setTimeout === 'function') {
cachedSetTimeout = setTimeout;
} else {
cachedSetTimeout = defaultSetTimout;
}
} catch (e) {
cachedSetTimeout = defaultSetTimout;
}
return worker
try {
if (typeof clearTimeout === 'function') {
cachedClearTimeout = clearTimeout;
} else {
cachedClearTimeout = defaultClearTimeout;
}
} catch (e) {
cachedClearTimeout = defaultClearTimeout;
}
} ())
function runTimeout(fun) {
if (cachedSetTimeout === setTimeout) {
//normal enviroments in sane situations
return setTimeout(fun, 0);
}
// if setTimeout wasn't available but was latter defined
if ((cachedSetTimeout === defaultSetTimout || !cachedSetTimeout) && setTimeout) {
cachedSetTimeout = setTimeout;
return setTimeout(fun, 0);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedSetTimeout(fun, 0);
} catch(e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedSetTimeout.call(null, fun, 0);
} catch(e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error
return cachedSetTimeout.call(this, fun, 0);
}
}
}
function runClearTimeout(marker) {
if (cachedClearTimeout === clearTimeout) {
//normal enviroments in sane situations
return clearTimeout(marker);
}
// if clearTimeout wasn't available but was latter defined
if ((cachedClearTimeout === defaultClearTimeout || !cachedClearTimeout) && clearTimeout) {
cachedClearTimeout = clearTimeout;
return clearTimeout(marker);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedClearTimeout(marker);
} catch (e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedClearTimeout.call(null, marker);
} catch (e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error.
// Some versions of I.E. have different rules for clearTimeout vs setTimeout
return cachedClearTimeout.call(this, marker);
}
}
exports.terminateWorker = function(instance){
instance.worker.terminate()
}
var queue = [];
var draining = false;
var currentQueue;
var queueIndex = -1;
exports.sendPacket = function sendPacket(instance, packet){
loadImage(packet.payload.image, function(img){
packet.payload.image = img
instance.worker.postMessage(packet)
})
function cleanUpNextTick() {
if (!draining || !currentQueue) {
return;
}
draining = false;
if (currentQueue.length) {
queue = currentQueue.concat(queue);
} else {
queueIndex = -1;
}
if (queue.length) {
drainQueue();
}
}
function drainQueue() {
if (draining) {
return;
}
var timeout = runTimeout(cleanUpNextTick);
draining = true;
function loadImage(image, cb){
if(typeof image === 'string'){
if(/^\#/.test(image)){
var len = queue.length;
while(len) {
currentQueue = queue;
queue = [];
while (++queueIndex < len) {
if (currentQueue) {
currentQueue[queueIndex].run();
}
}
queueIndex = -1;
len = queue.length;
}
currentQueue = null;
draining = false;
runClearTimeout(timeout);
}
process.nextTick = function (fun) {
var args = new Array(arguments.length - 1);
if (arguments.length > 1) {
for (var i = 1; i < arguments.length; i++) {
args[i - 1] = arguments[i];
}
}
queue.push(new Item(fun, args));
if (queue.length === 1 && !draining) {
runTimeout(drainQueue);
}
};
// v8 likes predictible objects
function Item(fun, array) {
this.fun = fun;
this.array = array;
}
Item.prototype.run = function () {
this.fun.apply(null, this.array);
};
process.title = 'browser';
process.browser = true;
process.env = {};
process.argv = [];
process.version = ''; // empty string to avoid regexp issues
process.versions = {};
function noop() {}
process.on = noop;
process.addListener = noop;
process.once = noop;
process.off = noop;
process.removeListener = noop;
process.removeAllListeners = noop;
process.emit = noop;
process.binding = function (name) {
throw new Error('process.binding is not supported');
};
process.cwd = function () { return '/' };
process.chdir = function (dir) {
throw new Error('process.chdir is not supported');
};
process.umask = function() { return 0; };
},{}],3:[function(require,module,exports){
(function (process){
'use strict';
var defaultOptions = {
workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'
};
if (process.env.NODE_ENV === "development") {
console.debug('Using Development Configuration');
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js';
}
exports.defaultOptions = defaultOptions;
exports.spawnWorker = function spawnWorker(instance, workerOptions) {
if (window.Blob && window.URL) {
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");']);
var worker = new Worker(window.URL.createObjectURL(blob));
} else {
var worker = new Worker(workerOptions.workerPath);
}
worker.onmessage = function (e) {
var packet = e.data;
instance._recv(packet);
};
return worker;
};
exports.terminateWorker = function (instance) {
instance.worker.terminate();
};
exports.sendPacket = function sendPacket(instance, packet) {
loadImage(packet.payload.image, function (img) {
packet.payload.image = img;
instance.worker.postMessage(packet);
});
};
function loadImage(image, cb) {
if (typeof image === 'string') {
if (/^\#/.test(image)) {
// element css selector
return loadImage(document.querySelector(image), cb)
}else{
// url or path
var im = new Image
return loadImage(document.querySelector(image), cb);
} else if (/(blob|data)\:/.test(image)) {
// data url
var im = new Image();
im.src = image;
im.onload = e => loadImage(im, cb);
return
im.onload = function (e) {
return loadImage(im, cb);
};
return;
} else {
var xhr = new XMLHttpRequest();
xhr.open('GET', image, true);
xhr.responseType = "blob";
xhr.onload = function (e) {
return loadImage(xhr.response, cb);
};
xhr.onerror = function (e) {
if (/^https?:\/\//.test(image) && !/^https:\/\/crossorigin.me/.test(image)) {
console.debug('Attempting to load image with CORS proxy');
loadImage('https://crossorigin.me/' + image, cb);
}
};
xhr.send(null);
return;
}
}else if(image instanceof File){
} else if (image instanceof File) {
// files
var fr = new FileReader()
fr.onload = e => loadImage(fr.result, cb);
fr.readAsDataURL(image)
return
}else if(image instanceof Blob){
return loadImage(URL.createObjectURL(image), cb)
}else if(image.getContext){
var fr = new FileReader();
fr.onload = function (e) {
return loadImage(fr.result, cb);
};
fr.readAsDataURL(image);
return;
} else if (image instanceof Blob) {
return loadImage(URL.createObjectURL(image), cb);
} else if (image.getContext) {
// canvas element
return loadImage(image.getContext('2d'), cb)
}else if(image.tagName == "IMG" || image.tagName == "VIDEO"){
return loadImage(image.getContext('2d'), cb);
} else if (image.tagName == "IMG" || image.tagName == "VIDEO") {
// image element or video element
var c = document.createElement('canvas');
c.width = image.naturalWidth || image.videoWidth;
c.width = image.naturalWidth || image.videoWidth;
c.height = image.naturalHeight || image.videoHeight;
var ctx = c.getContext('2d');
ctx.drawImage(image, 0, 0);
return loadImage(ctx, cb)
}else if(image.getImageData){
return loadImage(ctx, cb);
} else if (image.getImageData) {
// canvas context
var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
return loadImage(data, cb)
return loadImage(data, cb);
} else {
return cb(image);
}
cb(image)
throw new Error('Missing return in loadImage cascade');
}
},{}],2:[function(require,module,exports){
}).call(this,require('_process'))
},{"_process":2}],4:[function(require,module,exports){
"use strict";
// The result of dump.js is a big JSON tree

@@ -76,23 +382,23 @@ // which can be easily serialized (for instance

module.exports = function circularize(page){
page.paragraphs = []
page.lines = []
page.words = []
page.symbols = []
module.exports = function circularize(page) {
page.paragraphs = [];
page.lines = [];
page.words = [];
page.symbols = [];
page.blocks.forEach(function(block){
page.blocks.forEach(function (block) {
block.page = page;
block.lines = []
block.words = []
block.symbols = []
block.lines = [];
block.words = [];
block.symbols = [];
block.paragraphs.forEach(function(para){
block.paragraphs.forEach(function (para) {
para.block = block;
para.page = page;
para.words = []
para.symbols = []
para.lines.forEach(function(line){
para.words = [];
para.symbols = [];
para.lines.forEach(function (line) {
line.paragraph = para;

@@ -102,5 +408,5 @@ line.block = block;

line.symbols = []
line.symbols = [];
line.words.forEach(function(word){
line.words.forEach(function (word) {
word.line = line;

@@ -110,3 +416,3 @@ word.paragraph = para;

word.page = page;
word.symbols.forEach(function(sym){
word.symbols.forEach(function (sym) {
sym.word = word;

@@ -117,167 +423,214 @@ sym.line = line;

sym.page = page;
sym.line.symbols.push(sym)
sym.paragraph.symbols.push(sym)
sym.block.symbols.push(sym)
sym.page.symbols.push(sym)
})
word.paragraph.words.push(word)
word.block.words.push(word)
word.page.words.push(word)
})
line.block.lines.push(line)
line.page.lines.push(line)
})
para.page.paragraphs.push(para)
})
})
return page
}
},{}],3:[function(require,module,exports){
"use strict";
var adapter = require('./node/index.js')
var circularize = require('./common/circularize.js')
sym.line.symbols.push(sym);
sym.paragraph.symbols.push(sym);
sym.block.symbols.push(sym);
sym.page.symbols.push(sym);
});
word.paragraph.words.push(word);
word.block.words.push(word);
word.page.words.push(word);
});
line.block.lines.push(line);
line.page.lines.push(line);
});
para.page.paragraphs.push(para);
});
});
return page;
};
},{}],5:[function(require,module,exports){
'use strict';
function createWorker(workerOptions){
return new TesseractWorker(workerOptions)
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
var adapter = require('./node/index.js');
var circularize = require('./common/circularize.js');
var objectAssign = require('object-assign');
function create(workerOptions) {
workerOptions = workerOptions || {};
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions));
worker.create = create;
return worker;
}
class TesseractWorker {
constructor(workerOptions){
var TesseractWorker = function () {
function TesseractWorker(workerOptions) {
_classCallCheck(this, TesseractWorker);
this.worker = null;
this.workerOptions = workerOptions;
this._currentJob = null;
this._queue = []
this._queue = [];
}
recognize(image, options){
return this._delay(job => {
options = options || {}
options.lang = options.lang || 'eng';
job._send('recognize', { image: image, options: options, workerOptions: this.workerOptions })
})
}
detect(image, options){
options = options || {}
return this._delay(job => {
job._send('detect', { image: image, options: options, workerOptions: this.workerOptions })
})
}
_createClass(TesseractWorker, [{
key: 'recognize',
value: function recognize(image, options) {
var _this = this;
terminate(){
if(this.worker) adapter.terminateWorker(this);
this.worker = null;
}
return this._delay(function (job) {
if (typeof options === 'string') {
options = { lang: options };
} else {
options = options || {};
options.lang = options.lang || 'eng';
}
_delay(fn){
if(!this.worker) this.worker = adapter.spawnWorker(this, this.workerOptions);
job._send('recognize', { image: image, options: options, workerOptions: _this.workerOptions });
});
}
}, {
key: 'detect',
value: function detect(image, options) {
var _this2 = this;
var job = new TesseractJob(this);
this._queue.push(e => {
this._queue.shift()
this._currentJob = job;
fn(job)
})
if(!this._currentJob) this._dequeue();
return job
}
options = options || {};
return this._delay(function (job) {
job._send('detect', { image: image, options: options, workerOptions: _this2.workerOptions });
});
}
}, {
key: 'terminate',
value: function terminate() {
if (this.worker) adapter.terminateWorker(this);
this.worker = null;
}
}, {
key: '_delay',
value: function _delay(fn) {
var _this3 = this;
_dequeue(){
this._currentJob = null;
if(this._queue.length > 0){
this._queue[0]()
if (!this.worker) this.worker = adapter.spawnWorker(this, this.workerOptions);
var job = new TesseractJob(this);
this._queue.push(function (e) {
_this3._queue.shift();
_this3._currentJob = job;
fn(job);
});
if (!this._currentJob) this._dequeue();
return job;
}
}
}, {
key: '_dequeue',
value: function _dequeue() {
this._currentJob = null;
if (this._queue.length > 0) {
this._queue[0]();
}
}
}, {
key: '_recv',
value: function _recv(packet) {
_recv(packet){
if (packet.status === 'resolve' && packet.action === 'recognize') {
packet.data = circularize(packet.data);
}
if(packet.status === 'resolve' && packet.action === 'recognize'){
packet.data = circularize(packet.data);
}
if(this._currentJob.id === packet.jobId){
this._currentJob._handle(packet)
}else{
console.warn('Job ID ' + packet.jobId + ' not known.')
if (this._currentJob.id === packet.jobId) {
this._currentJob._handle(packet);
} else {
console.warn('Job ID ' + packet.jobId + ' not known.');
}
}
}
}
}]);
return TesseractWorker;
}();
var jobCounter = 0;
class TesseractJob {
constructor(instance){
this.id = 'Job-' + (++jobCounter) + '-' + Math.random().toString(16).slice(3, 8)
var TesseractJob = function () {
function TesseractJob(instance) {
_classCallCheck(this, TesseractJob);
this.id = 'Job-' + ++jobCounter + '-' + Math.random().toString(16).slice(3, 8);
this._instance = instance;
this._resolve = []
this._reject = []
this._progress = []
this._resolve = [];
this._reject = [];
this._progress = [];
}
then(resolve, reject){
if(this._resolve.push){
this._resolve.push(resolve)
}else{
resolve(this._resolve)
}
_createClass(TesseractJob, [{
key: 'then',
value: function then(resolve, reject) {
if (this._resolve.push) {
this._resolve.push(resolve);
} else {
resolve(this._resolve);
}
if(reject) this.catch(reject);
return this;
}
catch(reject){
if(this._reject.push){
this._reject.push(reject)
}else{
reject(this._reject)
if (reject) this.catch(reject);
return this;
}
return this;
}
progress(fn){
this._progress.push(fn)
return this;
}
_send(action, payload){
adapter.sendPacket(this._instance, {
jobId: this.id,
action: action,
payload: payload
})
}
_handle(packet){
var data = packet.data;
if(packet.status === 'resolve'){
if(this._resolve.length === 0) console.debug(data);
this._resolve.forEach(fn => {
var ret = fn(data);
if(ret && typeof ret.then == 'function'){
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.')
}
})
this._resolve = data;
this._instance._dequeue()
}else if(packet.status === 'reject'){
if(this._reject.length === 0) console.error(data);
this._reject.forEach(fn => fn(data))
this._reject = data;
this._instance._dequeue()
}else if(packet.status === 'progress'){
this._progress.forEach(fn => fn(data))
}else{
console.warn('Message type unknown', packet.status)
}, {
key: 'catch',
value: function _catch(reject) {
if (this._reject.push) {
this._reject.push(reject);
} else {
reject(this._reject);
}
return this;
}
}
}
}, {
key: 'progress',
value: function progress(fn) {
this._progress.push(fn);
return this;
}
}, {
key: '_send',
value: function _send(action, payload) {
adapter.sendPacket(this._instance, {
jobId: this.id,
action: action,
payload: payload
});
}
}, {
key: '_handle',
value: function _handle(packet) {
var data = packet.data;
if (packet.status === 'resolve') {
if (this._resolve.length === 0) console.debug(data);
this._resolve.forEach(function (fn) {
var ret = fn(data);
if (ret && typeof ret.then == 'function') {
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.');
}
});
this._resolve = data;
this._instance._dequeue();
} else if (packet.status === 'reject') {
if (this._reject.length === 0) console.error(data);
this._reject.forEach(function (fn) {
return fn(data);
});
this._reject = data;
this._instance._dequeue();
} else if (packet.status === 'progress') {
this._progress.forEach(function (fn) {
return fn(data);
});
} else {
console.warn('Message type unknown', packet.status);
}
}
}]);
return TesseractJob;
}();
var DefaultTesseract = createWorker(adapter.defaultOptions)
DefaultTesseract.createWorker = createWorker;
var DefaultTesseract = create();
module.exports = DefaultTesseract
module.exports = DefaultTesseract;
},{"./common/circularize.js":2,"./node/index.js":1}]},{},[3])(3)
},{"./common/circularize.js":4,"./node/index.js":3,"object-assign":1}]},{},[5])(5)
});
{
"name": "tesseract.js",
"version": "1.0.1",
"version": "1.0.2",
"description": "Pure Javascript Multilingual OCR",

@@ -8,3 +8,3 @@ "main": "src/index.js",

"test": "echo \"Error: no test specified\" & exit 1",
"start": "watchify src/index.js -o dist/tesseract.js --standalone Tesseract & watchify src/browser/worker.js -o dist/worker.js & http-server -p 7355",
"start": "watchify src/index.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract & watchify src/browser/worker.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js & http-server -p 7355",
"build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js"

@@ -21,2 +21,3 @@ },

"browserify": "^13.1.0",
"envify": "^3.4.1",
"http-server": "^0.9.0",

@@ -29,2 +30,3 @@ "watchify": "^3.7.0"

"level-js": "^2.2.4",
"object-assign": "^4.1.0",
"pako": "^1.0.3",

@@ -31,0 +33,0 @@ "png.js": "^0.2.1",

@@ -1,8 +0,4 @@

> # UNDER CONTRUCTION
> ## Due for Release on ~~Tuesday, Oct 4, 2016~~ Friday, Oct 7, 2016
> Sorry for the delay!
# [Tesseract.js](http://tesseract.projectnaptha.com/)
Tesseract.js is a javascript library that gets words in [almost any language](./tesseract_lang_list.md) out of images.
Tesseract.js is a javascript library that gets words in [almost any language](./tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/))

@@ -12,7 +8,7 @@ <!-- Under the hood, Tesseract.js wraps [tesseract.js-core](https://github.com/naptha/tesseract.js-core), an [emscripten](https://github.com/kripken/emscripten) port of the [Tesseract OCR Engine](https://github.com/tesseract-ocr/tesseract).

![fancy demo gif](http://placehold.it/700x300 "jhgfjhgf")
[![fancy demo gif](./demo.gif "Demo")](http://tesseract.projectnaptha.com)
Tesseract.js works with script tags, webpack/browserify, and node. Once you're [set up](#installation), using it is as simple as
Tesseract.js works with script tags, webpack/browserify, and node. [After you install it](#installation), using it is as simple as
```javascript
Tesseract.recognize(my_image)
Tesseract.recognize(myImage)
.progress(function (p) { console.log('progress', p) })

@@ -28,18 +24,9 @@ .then(function (result) { console.log('result', result) })

## &lt;script/>
## &lt;script />
You can either include Tesseract.js on you page with a cdn like this:
You can simply include Tesseract.js with a cdn like this:
```html
<script src='https://cdn.rawgit.com/naptha/tesseract.js/a01d2a2/dist/tesseract.js'></script>
<script src='https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/tesseract.js'></script>
```
Or you can grab copies of `tesseract.js` and `tesseract.worker.js` from the [dist folder](https://github.com/naptha/tesseract.js/tree/master/dist) and include your local copies like this:
```html
<script src='/path/to/tesseract.js'></script>
<script>
Tesseract.workerUrl = 'http://www.absolute-path-to/tesseract.worker.js'
</script>
```
After including your scripts, the `Tesseract` variable should be defined! You can [head to the docs](#docs) for a full treatment of the API.

@@ -75,7 +62,7 @@

+ [TesseractJob.then(callback: function) -> TesseractJob](#tesseractjobthencallback-function---tesseractjob)
+ [TesseractJob.error(callback: function) -> TesseractJob](#tesseractjoberrorcallback-function---tesseractjob)
* [Tesseract Remote File Options](#tesseract-remote-file-options)
+ [Tesseract.coreUrl](#tesseractcoreurl)
+ [Tesseract.workerUrl](#tesseractworkerurl)
+ [Tesseract.langUrl](#tesseractlangurl)
+ [TesseractJob.catch(callback: function) -> TesseractJob](#tesseractjoberrorcallback-function---tesseractjob)
* [Tesseract Configuration](#tesseract-configuration)
+ [corePath](#corepath)
+ [workerPath](#workerpath)
+ [langPath](#langpath)
* [Contributing](#contributing)

@@ -90,11 +77,11 @@ + [Development](#development)

- `image` is any [ImageLike](#imagelike) object.
- `options` is an optional flat json object. `options` may:
- `options` is either absent (in which case it is interpreted as `'eng'`), a string specifing a language short code from the [language list](./tesseract_lang_list.md), or a flat json object that may:
+ include properties that override some subset of the [default tesseract parameters](./tesseract_parameters.md)
+ include a `lang` property with a value from the [list of lang parameters](./tesseract_lang_list.md)
Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, and `error` methods can be used to act on the result.
Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, and `catch` methods can be used to act on the result.
### Simple Example:
```javascript
Tesseract.recognize('#my-image')
Tesseract.recognize(myImage)
.then(function(result){

@@ -108,3 +95,3 @@ console.log(result)

// if we know our image is of spanish words without the letter 'e':
Tesseract.recognize('#my-image', {
Tesseract.recognize(myImage, {
lang: 'spa',

@@ -122,3 +109,5 @@ tessedit_char_blacklist: 'e'

## Tesseract.detect(image: [ImageLike](#imagelike)) -> [TesseractJob](#tesseractjob)
Figures out what script (e.g. 'Latin', 'Chinese') the words in image are written in.
- `image` is any [ImageLike](#imagelike) object.

@@ -130,3 +119,3 @@

```javascript
Tesseract.detect('#my-image')
Tesseract.detect(myImage)
.then(function(result){

@@ -139,20 +128,32 @@ console.log(result)

## ImageLike
The main Tesseract.js functions take an `image` parameter, which should be something that is 'image-like'.
That means `image` should be
- an `img` element or querySelector that matches an `img` element
- a `video` element or querySelector that matches a `video` element
- a `canvas` element or querySelector that matches a `canvas` element
The main Tesseract.js functions take an `image` parameter, which should be something that is like an image. What's considered "image-like" differs depending on whether it is being run from the browser or through NodeJS.
On a browser, an image can be:
- an `img`, `video`, or `canvas` element
- a CanvasRenderingContext2D (returned by `canvas.getContext('2d')`)
- the absolute `url` of an image from the same website that is running your script. Browser security policies don't allow access to the content of images from other websites :(
- a `File` object (from a file `<input>` or drag-drop event)
- a `Blob` object
- a `ImageData` instance (an object containing `width`, `height` and `data` properties)
- a path or URL to an accessible image (the image must either be hosted locally or accessible by CORS)
In NodeJS, an image can be
- a path to a local image
- a `Buffer` instance containing a `PNG` or `JPEG` image
- a `ImageData` instance (an object containing `width`, `height` and `data` properties)
## TesseractJob
A TesseractJob is an an object returned by a call to recognize or detect.
All methods of a given TesseractJob return that TesseractJob to enable chaining.
A TesseractJob is an an object returned by a call to `recognize` or `detect`. It's inspired by the ES6 Promise interface and provides `then` and `catch` methods. One important difference is that these methods return the job itself (to enable chaining) rather than new.
Typical use is:
```javascript
Tesseract.recognize('#my-image')
Tesseract.recognize(myImage)
.progress(function(message){console.log(message)})
.error(function(err){console.error(err)})
.catch(function(err){console.error(err)})
.then(function(result){console.log(result)})

@@ -163,7 +164,7 @@ ```

```javascript
var job1 = Tesseract.recognize('#my-image');
var job1 = Tesseract.recognize(myImage);
job1.progress(function(message){console.log(message)});
job1.error(function(err){console.error(err)});
job1.catch(function(err){console.error(err)});

@@ -181,4 +182,4 @@ job1.then(function(result){console.log(result)})

```javascript
Tesseract.recognize('#my-image')
.progress(function(message){console.log('progress is: 'message)})
Tesseract.recognize(myImage)
.progress(function(message){console.log('progress is: ', message)})
```

@@ -207,3 +208,3 @@

```javascript
Tesseract.recognize('#my-image')
Tesseract.recognize(myImage)
.then(function(result){console.log('result is: 'result)})

@@ -229,39 +230,26 @@ ```

### TesseractJob.error(callback: function) -> TesseractJob
### TesseractJob.catch(callback: function) -> TesseractJob
Sets `callback` as the function that will be called if the job fails.
- `callback` is a function with the signature `callback(erros)` where `error` is a json object.
## Tesseract Remote File Options
### Tesseract.coreUrl
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://cdn.rawgit.com/naptha/tesseract.js-core/master/index.js'. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file.
## Tesseract Configuration
For example:
```javascript
Tesseract.coreUrl = 'https://absolute-path-to/tesseract.js-core/index.js'
window.Tesseract = Tesseract.create({
workerPath: '/path/to/worker.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
})
```
### Tesseract.workerUrl
### corePath
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://cdn.rawgit.com/naptha/tesseract.js-core/master/index.js'. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file.
### workerPath
A string specifying the location of the [tesseract.worker.js](./dist/tesseract.worker.js) file, with default value 'https://cdn.rawgit.com/naptha/tesseract.js/8b915dc/dist/tesseract.worker.js'. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file.
For example:
```javascript
Tesseract.workerUrl = 'https://absolute-path-to/tesseract.worker.js'
```
### langPath
A string specifying the location of the tesseract language files, with default value 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'. Language file urls are calculated according to the formula `langPath + langCode + '.traineddata.gz'`. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use different language files.
### Tesseract.langUrl
A string specifying the location of the tesseract language files, with default value 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'. Language file urls are calculated according to the formula `Tesseract.langUrl + lang + '.traineddata.gz'`. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use different language files.
In the following exampple, Tesseract.js will download the language file from 'https://absolute-path-to/lang/folder/rus.traineddata.gz':
```javascript
Tesseract.langUrl = 'https://absolute-path-to/lang/folder/'
Tesseract.recognize('#my-im', {
lang: 'rus'
})
```
## Contributing

@@ -281,9 +269,10 @@ ### Development

tesseract.js@1.0.0 start /Users/guillermo/Desktop/code_static/tesseract.js
node devServer.js
Starting up http-server, serving ./
Available on:
http://127.0.0.1:7355
http://[your ip]:7355
Listening at http://localhost:7355
```
Then open `http://localhost:7355` in your favorite browser. The devServer automatically rebuilds tesseract.js and tesseract.worker.js when you change files in the src folder.
Then open `http://localhost:7355/examples/file-input/demo.html` in your favorite browser. The devServer automatically rebuilds tesseract.js and tesseract.worker.js when you change files in the src folder.

@@ -297,2 +286,2 @@ ### Building Static Files

### Send us a Pull Request!
Thanks :)
Thanks :)

@@ -1,9 +0,23 @@

exports.defaultOptions = {
var defaultOptions = {
workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
workerPath: 'dist/worker.js',
tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
}
if (process.env.NODE_ENV === "development") {
console.debug('Using Development Configuration')
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js'
}
exports.defaultOptions = defaultOptions;
exports.spawnWorker = function spawnWorker(instance, workerOptions){
var worker = new Worker(workerOptions.workerPath)
if(window.Blob && window.URL){
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");'])
var worker = new Worker(window.URL.createObjectURL(blob));
}else{
var worker = new Worker(workerOptions.workerPath)
}
worker.onmessage = function(e){

@@ -33,4 +47,4 @@ var packet = e.data;

return loadImage(document.querySelector(image), cb)
}else{
// url or path
}else if(/(blob|data)\:/.test(image)){
// data url
var im = new Image

@@ -40,2 +54,15 @@ im.src = image;

return
}else{
var xhr = new XMLHttpRequest();
xhr.open('GET', image, true)
xhr.responseType = "blob";
xhr.onload = e => loadImage(xhr.response, cb);
xhr.onerror = function(e){
if(/^https?:\/\//.test(image) && !/^https:\/\/crossorigin.me/.test(image)){
console.debug('Attempting to load image with CORS proxy')
loadImage('https://crossorigin.me/' + image, cb)
}
}
xhr.send(null)
return
}

@@ -65,4 +92,7 @@ }else if(image instanceof File){

return loadImage(data, cb)
}else{
return cb(image)
}
cb(image)
throw new Error('Missing return in loadImage cascade')
}

@@ -1,2 +0,2 @@

var leveljs = require('level-js')
const leveljs = require('level-js')
var db = typeof indexedDB === 'undefined' ? { open: (_, cb) => cb(true) } : leveljs('./tessdata2')

@@ -25,3 +25,3 @@

var ungzip = require('pako').ungzip;
const ungzip = require('pako').ungzip;

@@ -34,4 +34,4 @@ function fetchLanguageData(req, res, cb){

var xhr = new XMLHttpRequest();
xhr.open('GET', url, true);
xhr.responseType = 'arraybuffer';
xhr.open('GET', url, true);
xhr.onerror = e => {

@@ -38,0 +38,0 @@ xhr.onprogress = xhr.onload = null

@@ -1,4 +0,6 @@

"use strict";
const workerUtils = require('../common/worker.js')
var workerUtils = require('../common/worker.js')
if (process.env.NODE_ENV === "development") {
console.debug('Using Development Worker')
}

@@ -12,5 +14,5 @@ global.addEventListener('message', function(e){

if(!global.TesseractCore){
res.progress({ status: 'loading tesseract core' })
importScripts(req.workerOptions.tesseractPath)
res.progress({ status: 'loaded tesseract core' })
res.progress({ status: 'loading tesseract core', progress: 0 })
importScripts(req.workerOptions.corePath)
res.progress({ status: 'loading tesseract core', progress: 1 })
}

@@ -17,0 +19,0 @@ return TesseractCore

@@ -38,3 +38,3 @@ var latestJob;

res.progress({ status: 'initializing tesseract api' })
res.progress({ status: 'initializing tesseract', progress: 0 })
Module = Core({

@@ -49,3 +49,3 @@ TOTAL_MEMORY: req.memory,

base = new Module.TessBaseAPI()
res.progress({ status: 'initialized tesseract api' })
res.progress({ status: 'initializing tesseract', progress: 1 })
}

@@ -79,3 +79,3 @@ }

Module.FS_createDataFile('tessdata', lang + ".traineddata", data, true, false);
res.progress({ status: 'loaded ' + lang + '.traineddata' })
res.progress({ status: 'loading ' + lang + '.traineddata', progress: 1 })
Module._loadedLanguages[lang] = true;

@@ -94,5 +94,7 @@ cb()

res.progress({ status: 'initializing api', progress: 0 })
base.Init(null, lang)
res.progress({ status: 'initialized with language' })
res.progress({ status: 'initializing api', progress: 0.3 })
var options = req.options;
for (var option in options) {

@@ -104,3 +106,6 @@ if (options.hasOwnProperty(option)) {

res.progress({ status: 'initializing api', progress: 0.6 })
var ptr = setImage(Module, base, req.image);
res.progress({ status: 'initializing api', progress: 1 })
base.Recognize(null)

@@ -107,0 +112,0 @@

@@ -1,9 +0,10 @@

"use strict";
const adapter = require('./node/index.js')
const circularize = require('./common/circularize.js')
const objectAssign = require('object-assign');
var adapter = require('./node/index.js')
var circularize = require('./common/circularize.js')
function createWorker(workerOptions){
return new TesseractWorker(workerOptions)
function create(workerOptions){
workerOptions = workerOptions || {};
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions))
worker.create = create;
return worker;
}

@@ -21,4 +22,9 @@

return this._delay(job => {
options = options || {}
options.lang = options.lang || 'eng';
if(typeof options === 'string'){
options = { lang: options };
}else{
options = options || {}
options.lang = options.lang || 'eng';
}
job._send('recognize', { image: image, options: options, workerOptions: this.workerOptions })

@@ -141,6 +147,5 @@ })

var DefaultTesseract = createWorker(adapter.defaultOptions)
DefaultTesseract.createWorker = createWorker;
var DefaultTesseract = create()
module.exports = DefaultTesseract

@@ -1,2 +0,2 @@

var path = require('path')
const path = require('path')

@@ -8,4 +8,4 @@ exports.defaultOptions = {

var fork = require('child_process').fork;
var fs = require('fs')
const fork = require('child_process').fork;
const fs = require('fs')

@@ -33,2 +33,3 @@ exports.spawnWorker = function spawnWorker(instance, workerOptions){

function loadImage(image, cb){
// TODO: support URLs
if(typeof image === 'string'){

@@ -43,3 +44,2 @@ fs.readFile(image, function(err, buffer){

if(mime === 'image/png'){

@@ -46,0 +46,0 @@ var PNGReader = require('png.js');

@@ -1,5 +0,5 @@

var http = require("http"),
zlib = require("zlib"),
fs = require("fs"),
path = require("path");
const http = require("http"),
zlib = require("zlib"),
fs = require("fs"),
path = require("path");

@@ -6,0 +6,0 @@ var langdata = require('../common/langdata.json')

@@ -1,5 +0,3 @@

"use strict";
const workerUtils = require('../common/worker.js')
var workerUtils = require('../common/worker.js')
process.on('message', function(packet){

@@ -6,0 +4,0 @@ workerUtils.dispatchHandlers(packet, obj => process.send(obj))

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc