Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

tesseract.js

Package Overview
Dependencies
Maintainers
3
Versions
68
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

tesseract.js - npm Package Compare versions

Comparing version 2.0.0-alpha.4 to 2.0.0-alpha.6

.nyc_output/7954ef20-a9ce-4dc9-b162-86a6b44eaa15.json

54

docs/examples.md

@@ -59,3 +59,3 @@ # Tesseract.js Examples

### with whitelist chars (^2.0.0-alpha.4)
### with whitelist char (^2.0.0-alpha.5)

@@ -75,3 +75,3 @@ Sadly, whitelist chars is not supported in tesseract.js v4, so in tesseract.js we need to switch to tesseract v3 mode to make it work.

{
'init_oem': OEM.TESSERACT_ONLY,
'tessedit_ocr_engine_mode': OEM.TESSERACT_ONLY,
'tessedit_char_whitelist': '0123456789-.',

@@ -87,1 +87,51 @@ }

```
### with different pageseg mode (^2.0.0-alpha.5)
Check here for more details of pageseg mode: https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163
```javascript
import Tesseract from 'tesseract.js';
const { TesseractWorker, PSM } = Tesseract;
const worker = new TesseractWorker();
worker
.recognize(
'http://jeroen.github.io/images/testocr.png',
'eng',
{
'tessedit_pageseg_mode': PSM.SINGLE_BLOCK,
}
)
.progress((p) => {
console.log('progress', p);
})
.then((result) => {
console.log(result);
});
```
### with pdf output (^2.0.0-alpha.5)
```javascript
import Tesseract from 'tesseract.js';
const { TesseractWorker } = Tesseract;
const worker = new TesseractWorker();
worker
.recognize(
'http://jeroen.github.io/images/testocr.png',
'eng',
{
'tessedit_create_pdf': '1',
}
)
.progress((p) => {
console.log('progress', p);
})
.then((result) => {
console.log(result);
});
```

4

docs/local-installation.md

@@ -13,5 +13,5 @@ ## Local Installation

const worker = Tesseract.TesseractWorker({
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0-alpha.4/dist/worker.min.js',
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0-alpha.6/dist/worker.min.js',
langPath: 'https://tessdata.projectnaptha.com/4.0.0',
corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.8/tesseract-core.wasm.js',
corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.10/tesseract-core.wasm.js',
});

@@ -18,0 +18,0 @@ ```

{
"name": "tesseract.js",
"version": "2.0.0-alpha.4",
"version": "2.0.0-alpha.6",
"description": "Pure Javascript Multilingual OCR",

@@ -50,3 +50,3 @@ "main": "src/index.js",

"resolve-url": "^0.2.1",
"tesseract.js-core": "^2.0.0-beta.8",
"tesseract.js-core": "^2.0.0-beta.10",
"tesseract.js-utils": "^1.0.0-beta.5"

@@ -53,0 +53,0 @@ },

@@ -42,3 +42,3 @@ # [Tesseract.js](http://tesseract.projectnaptha.com/)

```html
<script src='https://unpkg.com/tesseract.js@v2.0.0-alpha.4/dist/tesseract.min.js'></script>
<script src='https://unpkg.com/tesseract.js@v2.0.0-alpha.6/dist/tesseract.min.js'></script>
```

@@ -45,0 +45,0 @@

@@ -74,2 +74,21 @@ /**

const downloadFile = (path, blob) => {
if (navigator.msSaveBlob) {
// IE 10+
navigator.msSaveBlob(blob, path);
} else {
const link = document.createElement('a');
// Browsers that support HTML5 download attribute
if (link.download !== undefined) {
const url = URL.createObjectURL(blob);
link.setAttribute('href', url);
link.setAttribute('download', path);
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
}
}
/*

@@ -87,3 +106,3 @@ * Default options for browser worker

*/
corePath: `https://unpkg.com/tesseract.js-core@v2.0.0-beta.8/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`,
corePath: `https://unpkg.com/tesseract.js-core@v2.0.0-beta.10/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`,
};

@@ -113,3 +132,8 @@

worker.onmessage = ({ data }) => {
instance.recv(data);
if (data.jobId.startsWith('Job')) {
instance.recv(data);
} else if (data.jobId.startsWith('Download')) {
const { path, blob } = data;
downloadFile(path, blob);
}
};

@@ -116,0 +140,0 @@

@@ -45,2 +45,11 @@ /**

},
b64toU8Array: s => new Uint8Array(atob(s).split('').map(c => c.charCodeAt(0))),
writeFile: (path, data, type) => {
const blob = new Blob([data], { type });
self.postMessage({
jobId: 'Download',
path,
blob,
});
},
});

@@ -0,1 +1,3 @@

const { OEM, PSM } = require('./types');
module.exports = {

@@ -10,2 +12,14 @@ defaultOptions: {

},
/*
* default params for recognize()
*/
defaultParams: {
tessedit_ocr_engine_mode: OEM.TESSERACT_LSTM_COMBINED,
tessedit_pageseg_mode: PSM.SINGLE_BLOCK,
tessedit_char_whiltelist: '',
tessedit_create_pdf: '0',
textonly_pdf: '0',
pdf_name: 'tesseract.js-ocr-result',
pdf_title: 'Tesseract.js OCR Result',
},
};

@@ -5,3 +5,3 @@ module.exports = {

*
* By default tesseract.js uses DEFAULT mode, which uses LSTM when possible.
* By default tesseract.js uses TESSERACT_LSTM_COMBINED mode, which uses LSTM when possible.
* If you need to use some tesseract v3 features (like tessedit_char_whitelist),

@@ -18,2 +18,21 @@ * you need to use TESSERACT_ONLY mode.

},
/*
* PSM = Page Segmentation Mode
*/
PSM: {
OSD_ONLY: '0',
AUTO_OSD: '1',
AUTO_ONLY: '2',
AUTO: '3',
SINGLE_COLUMN: '4',
SINGLE_BLOCK_VERT_TEXT: '5',
SINGLE_BLOCK: '6',
SINGLE_LINE: '7',
SINGLE_WORD: '8',
SINGLE_CHAR: '9',
SPARSE_TEXT: '10',
SPARSE_TEXT_OSD: '11',
RAW_LINE: '12',
COUNT: '13',
},
};

@@ -12,3 +12,5 @@ /**

const check = require('check-types');
const pdfTTF = require('./pdf-ttf');
const dump = require('./dump');
const { defaultParams } = require('./options');

@@ -56,2 +58,54 @@ /*

/**
* handleParams
*
* @name handleParams
* @function hanlde params from users
* @access private
* @param {string} lang - lang string for Init()
* @param {object} customParams - an object of params
*/
const handleParams = (lang, customParams) => {
const {
tessedit_ocr_engine_mode,
...params
} = {
...defaultParams,
...customParams,
};
api.Init(null, lang, tessedit_ocr_engine_mode);
Object.keys(params).forEach((key) => {
api.SetVariable(key, params[key]);
});
};
/**
* handleOutput
*
* @name handleOutput
* @function handle file output
* @access private
* @param {object} customParams - an object of params
*/
const handleOutput = (customParams) => {
const {
tessedit_create_pdf,
textonly_pdf,
pdf_name,
pdf_title,
} = {
...defaultParams,
...customParams,
};
if (tessedit_create_pdf === '1') {
const pdfRenderer = new TessModule.TessPDFRenderer(pdf_name, '/', textonly_pdf === '1');
pdfRenderer.BeginDocument(pdf_title);
pdfRenderer.AddImage(api);
pdfRenderer.EndDocument();
adapter.writeFile(`${pdf_name}.pdf`, TessModule.FS.readFile(`/${pdf_name}.pdf`), 'application/pdf');
TessModule._free(pdfRenderer);
}
}
/**
* handleInit

@@ -80,2 +134,3 @@ *

TessModule = tessModule;
TessModule.FS.writeFile('/pdf.ttf', adapter.b64toU8Array(pdfTTF));
api = new TessModule.TessBaseAPI();

@@ -129,5 +184,2 @@ res.progress({ status: 'initialized tesseract', progress: 1 });

.then(() => {
const OEM = check.undefined(params['init_oem'])
? TessModule.OEM_DEFAULT
: params['init_oem'];
const progressUpdate = (progress) => {

@@ -137,11 +189,8 @@ res.progress({ status: 'initializing api', progress });

progressUpdate(0);
api.Init(null, lang, OEM);
progressUpdate(0.3);
Object.keys(params).filter(key => !key.startsWith('init_')).forEach((key) => {
api.SetVariable(key, params[key]);
});
progressUpdate(0.6);
handleParams(lang, params);
progressUpdate(0.5);
const ptr = setImage(image);
progressUpdate(1);
api.Recognize(null);
handleOutput(params);
const result = dump(TessModule, api);

@@ -148,0 +197,0 @@ api.End();

@@ -12,3 +12,3 @@ /**

const TesseractWorker = require('./common/TesseractWorker');
const { OEM } = require('./common/types');
const types = require('./common/types');

@@ -21,3 +21,3 @@ module.exports = {

/** Check ./common/types for more details */
OEM,
...types,
};

@@ -36,2 +36,9 @@ /**

},
b64toU8Array: s => Buffer.from(s, 'base64'),
writeFile: (path, data) => {
const fs = require('fs');
fs.writeFile(path, data, () => {
console.log('File Write Succeeded!');
});
},
});

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc