tesseract.js
Advanced tools
Comparing version 2.0.0-alpha.3 to 2.0.0-alpha.4
@@ -16,3 +16,3 @@ # API | ||
```javascript | ||
const worker = new Tessearct.TesseractWorker(); | ||
const worker = new Tesseract.TesseractWorker(); | ||
worker | ||
@@ -27,3 +27,3 @@ .recognize(myImage) | ||
```javascript | ||
const worker = new Tessearct.TesseractWorker(); | ||
const worker = new Tesseract.TesseractWorker(); | ||
// if we know our image is of spanish words without the letter 'e': | ||
@@ -48,3 +48,3 @@ worker | ||
```javascript | ||
const worker = new Tessearct.TesseractWorker(); | ||
const worker = new Tesseract.TesseractWorker(); | ||
worker | ||
@@ -63,3 +63,3 @@ .detect(myImage) | ||
```javascript | ||
const worker = new Tessearct.TesseractWorker(); | ||
const worker = new Tesseract.TesseractWorker(); | ||
worker.recognize(myImage) | ||
@@ -74,3 +74,3 @@ .progress(message => console.log(message)) | ||
```javascript | ||
const worker = new Tessearct.TesseractWorker(); | ||
const worker = new Tesseract.TesseractWorker(); | ||
const job1 = worker.recognize(myImage); | ||
@@ -95,3 +95,3 @@ | ||
```javascript | ||
const worker = new Tessearct.TesseractWorker(); | ||
const worker = new Tesseract.TesseractWorker(); | ||
worker.recognize(myImage) | ||
@@ -122,3 +122,3 @@ .progress(function(message){console.log('progress is: ', message)}); | ||
```javascript | ||
const worker = new Tessearct.TesseractWorker(); | ||
const worker = new Tesseract.TesseractWorker(); | ||
worker.recognize(myImage) | ||
@@ -125,0 +125,0 @@ .then(function(result){console.log('result is: ', result)}); |
@@ -11,3 +11,3 @@ # Tesseract.js Examples | ||
const { TesseractWorker } = Tesseract; | ||
const worker = new TessearctWorker(); | ||
const worker = new TesseractWorker(); | ||
@@ -27,3 +27,3 @@ worker | ||
const { TesseractWorker } = Tesseract; | ||
const worker = new TessearctWorker(); | ||
const worker = new TesseractWorker(); | ||
@@ -40,3 +40,3 @@ worker | ||
### with multiple languages (separate by '+'') | ||
### with multiple languages, separate by '+' | ||
@@ -47,3 +47,3 @@ ```javascript | ||
const { TesseractWorker } = Tesseract; | ||
const worker = new TessearctWorker(); | ||
const worker = new TesseractWorker(); | ||
@@ -62,1 +62,28 @@ worker | ||
``` | ||
### with whitelist chars (^2.0.0-alpha.4) | ||
Sadly, whitelist chars is not supported in tesseract.js v4, so in tesseract.js we need to switch to tesseract v3 mode to make it work. | ||
```javascript | ||
import Tesseract from 'tesseract.js'; | ||
const { TesseractWorker, OEM } = Tesseract; | ||
const worker = new TesseractWorker(); | ||
worker | ||
.recognize( | ||
'http://jeroen.github.io/images/testocr.png', | ||
'eng', | ||
{ | ||
'init_oem': OEM.TESSERACT_ONLY, | ||
'tessedit_char_whitelist': '0123456789-.', | ||
} | ||
) | ||
.progress((p) => { | ||
console.log('progress', p); | ||
}) | ||
.then((result) => { | ||
console.log(result); | ||
}); | ||
``` |
## Local Installation | ||
Check here for an example: https://github.com/jeromewu/tesseract.js-offline | ||
In browser environment, `tesseract.js` simply provides the API layer. Internally, it opens a WebWorker to handle requests. That worker itself loads code from the Emscripten-built `tesseract.js-core` which itself is hosted on a CDN. Then it dynamically loads language files hosted on another CDN. | ||
Because of this we recommend loading `tesseract.js` from a CDN. But if you really need to have all your files local, you can pass extra arguments to `TessearctWorker` to specify custom paths for workers, languages, and core. | ||
Because of this we recommend loading `tesseract.js` from a CDN. But if you really need to have all your files local, you can pass extra arguments to `TesseractWorker` to specify custom paths for workers, languages, and core. | ||
@@ -11,5 +13,5 @@ In Node.js environment, the only path you may want to customize is languages/langPath. | ||
const worker = Tesseract.TesseractWorker({ | ||
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0-alpha.3/dist/worker.min.js', | ||
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0-alpha.4/dist/worker.min.js', | ||
langPath: 'https://tessdata.projectnaptha.com/4.0.0', | ||
corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.5/tesseract-core.js', | ||
corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.8/tesseract-core.wasm.js', | ||
}); | ||
@@ -25,2 +27,4 @@ ``` | ||
### corePath | ||
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js-core@v2.0.0-beta.5/tesseract-core.js'. | ||
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.8/tesseract-core.wasm.js' (fallback to tesseract-core.asm.js when WebAssembly is not available). | ||
Another WASM option is 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.8/tesseract-core.js' which is a script that loads 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.8/tesseract-core.wasm'. But it fails to fetch at this moment. |
{ | ||
"name": "tesseract.js", | ||
"version": "2.0.0-alpha.3", | ||
"version": "2.0.0-alpha.4", | ||
"description": "Pure Javascript Multilingual OCR", | ||
@@ -10,4 +10,9 @@ "main": "src/index.js", | ||
"prepublishOnly": "npm run build", | ||
"test": "npm run test:node", | ||
"wait": "wait-on http://localhost:3000/package.json", | ||
"test": "npm-run-all -p -r start test:all", | ||
"test:all": "npm-run-all wait test:browser:* test:node", | ||
"test:node": "nyc mocha --exit --bail --require ./scripts/test-helper.js ./tests/*.test.js", | ||
"test:browser-tpl": "mocha-headless-chrome -a incognito -a no-sandbox -a disable-setuid-sandbox -t 300000", | ||
"test:browser:detect": "npm run test:browser-tpl -- -f ./tests/browser/detect.test.html", | ||
"test:browser:recognize": "npm run test:browser-tpl -- -f ./tests/browser/recognize.test.html", | ||
"lint": "eslint src" | ||
@@ -24,2 +29,3 @@ }, | ||
"devDependencies": { | ||
"cors": "^2.8.5", | ||
"eslint": "^5.9.0", | ||
@@ -33,3 +39,6 @@ "eslint-config-airbnb": "^17.1.0", | ||
"mocha": "^5.2.0", | ||
"mocha-headless-chrome": "^2.0.2", | ||
"npm-run-all": "^4.1.5", | ||
"nyc": "^13.1.0", | ||
"wait-on": "^3.2.0", | ||
"webpack": "^4.26.0", | ||
@@ -44,3 +53,3 @@ "webpack-cli": "^3.1.2", | ||
"resolve-url": "^0.2.1", | ||
"tesseract.js-core": "^2.0.0-beta.5", | ||
"tesseract.js-core": "^2.0.0-beta.8", | ||
"tesseract.js-utils": "^1.0.0-beta.5" | ||
@@ -47,0 +56,0 @@ }, |
@@ -12,7 +12,7 @@ # [Tesseract.js](http://tesseract.projectnaptha.com/) | ||
**Tessearct.js v2 is now available and under development in master branch, check [support/1.x](https://github.com/naptha/tesseract.js/tree/support/1.x) branch for v1.** | ||
**Tesseract.js v2 is now available and under development in master branch, check [support/1.x](https://github.com/naptha/tesseract.js/tree/support/1.x) branch for v1.** | ||
Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/)) | ||
[![fancy demo gif](./docs/demo.gif)](http://tesseract.projectnaptha.com) | ||
[![fancy demo gif](./docs/images/demo.gif)](http://tesseract.projectnaptha.com) | ||
@@ -43,3 +43,3 @@ Tesseract.js works with script tags, [webpack](https://webpack.js.org/), and [Node.js](https://nodejs.org/en/). [After you install it](#installation), using it is as simple as | ||
```html | ||
<script src='https://unpkg.com/tesseract.js@v2.0.0-alpha.3/dist/tesseract.min.js'></script> | ||
<script src='https://unpkg.com/tesseract.js@v2.0.0-alpha.4/dist/tesseract.min.js'></script> | ||
``` | ||
@@ -85,2 +85,3 @@ | ||
* [Local Installation](./docs/local-installation.md) | ||
* [FAQ](./docs/faq.md) | ||
@@ -87,0 +88,0 @@ # Contributing |
@@ -5,2 +5,3 @@ const webpack = require('webpack'); | ||
const path = require('path'); | ||
const cors = require('cors'); | ||
const webpackConfig = require('./webpack.config.dev'); | ||
@@ -11,2 +12,3 @@ | ||
app.use(cors()); | ||
app.use('/', express.static(path.resolve(__dirname, '..'))); | ||
@@ -13,0 +15,0 @@ app.use(middleware(compiler, { publicPath: '/dist' })); |
@@ -1,26 +0,3 @@ | ||
const express = require('express'); | ||
const path = require('path'); | ||
global.expect = require('expect.js'); | ||
global.fetch = require('node-fetch'); | ||
global.Tesseract = require('../src'); | ||
const app = express(); | ||
let devServer = null; | ||
global.startServer = (done) => { | ||
if (devServer === null) { | ||
app.use('/', express.static(path.resolve(__dirname, '..'))); | ||
devServer = app.listen(3000, done); | ||
} else { | ||
done(); | ||
} | ||
}; | ||
global.stopServer = (done) => { | ||
if (devServer !== null) { | ||
devServer.close(done); | ||
devServer = null; | ||
} else { | ||
done(); | ||
} | ||
}; |
@@ -86,3 +86,3 @@ /** | ||
*/ | ||
corePath: `https://unpkg.com/tesseract.js-core@v2.0.0-beta.5/tesseract-core${typeof WebAssembly === 'object' ? '' : '.asm'}.js`, | ||
corePath: `https://unpkg.com/tesseract.js-core@v2.0.0-beta.8/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`, | ||
}; | ||
@@ -102,5 +102,7 @@ | ||
let worker; | ||
if (window.Blob && window.URL) { | ||
const blob = new Blob([`importScripts("${workerPath}");`]); | ||
worker = new Worker(window.URL.createObjectURL(blob)); | ||
if (Blob && URL) { | ||
const blob = new Blob([`importScripts("${workerPath}");`], { | ||
type: 'application/javascript', | ||
}); | ||
worker = new Worker(URL.createObjectURL(blob)); | ||
} else { | ||
@@ -107,0 +109,0 @@ worker = new Worker(workerPath); |
@@ -34,3 +34,9 @@ /** | ||
*/ | ||
global.TesseractCore = typeof WebAssembly === 'object' ? global.TesseractCoreWASM : global.TesseractCoreASM; | ||
if (check.not.undefined(global.TesseractCoreWASM) && typeof WebAssembly === 'object') { | ||
global.TesseractCore = global.TesseractCoreWASM; | ||
} else if (check.not.undefined(global.TesseractCoreASM)){ | ||
global.TesseractCore = global.TesseractCoreASM; | ||
} else { | ||
throw Error('Failed to load TesseractCore'); | ||
} | ||
res.progress({ status: 'loading tesseract core', progress: 1 }); | ||
@@ -37,0 +43,0 @@ } |
@@ -11,2 +11,3 @@ /** | ||
const { readImage, loadLang } = require('tesseract.js-utils'); | ||
const check = require('check-types'); | ||
const dump = require('./dump'); | ||
@@ -126,2 +127,5 @@ | ||
.then(() => { | ||
const OEM = check.undefined(params['init_oem']) | ||
? TessModule.OEM_DEFAULT | ||
: params['init_oem']; | ||
const progressUpdate = (progress) => { | ||
@@ -131,5 +135,5 @@ res.progress({ status: 'initializing api', progress }); | ||
progressUpdate(0); | ||
api.Init(null, lang); | ||
api.Init(null, lang, OEM); | ||
progressUpdate(0.3); | ||
Object.keys(params).forEach((key) => { | ||
Object.keys(params).filter(key => !key.startsWith('init_')).forEach((key) => { | ||
api.SetVariable(key, params[key]); | ||
@@ -136,0 +140,0 @@ }); |
@@ -12,2 +12,3 @@ /** | ||
const TesseractWorker = require('./common/TesseractWorker'); | ||
const { OEM } = require('./common/types'); | ||
@@ -19,2 +20,4 @@ module.exports = { | ||
utils, | ||
/** Check ./common/types for more details */ | ||
OEM, | ||
}; |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
1346961
51
1933
115
16