Socket
Socket
Sign inDemoInstall

tesseract.js

Package Overview
Dependencies
22
Maintainers
3
Versions
67
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 2.0.0-alpha.11 to 2.0.0-alpha.12

.gitpod.yml

41

docs/examples.md

@@ -8,3 +8,4 @@ # Tesseract.js Examples

- Offline version: https://github.com/jeromewu/tesseract.js-offline
- With Vue (similar with React/Angular): https://github.com/jeromewu/tesseract-vue-app
- With Vue: https://github.com/jeromewu/tesseract.js-vue-app
- With Angular: https://github.com/jeromewu/tesseract.js-angular-app
- Chrome Extension: https://github.com/jeromewu/tesseract.js-chrome-extension

@@ -127,3 +128,3 @@

### with pdf output (^2.0.0-alpha.7)
### with pdf output (^2.0.0-alpha.12)

@@ -143,3 +144,3 @@ In this example, pdf file will be downloaded in browser and write to file system in Node.js

{
'tessedit_create_pdf': '1',
'tessjs_create_pdf': '1',
}

@@ -169,5 +170,5 @@ )

{
'tessedit_create_pdf': '1',
'pdf_auto_download': false, // disable auto download
'pdf_bin': true, // add pdf file bin array in result
'tessjs_create_pdf': '1',
'tessjs_pdf_auto_download': false, // disable auto download
'tessjs_pdf_bin': true, // add pdf file bin array in result
}

@@ -204,1 +205,29 @@ )

```
### with only part of the image (^2.0.0-alpha.12)
```javascript
import Tesseract from 'tesseract.js';
const { TesseractWorker } = Tesseract;
const worker = new TesseractWorker();
worker
.recognize(
'https://tesseract.projectnaptha.com/img/eng_bw.png',
'eng',
{
tessjs_image_rectangle_left: 0,
tessjs_image_rectangle_top: 0,
tessjs_image_rectangle_width: 500,
tessjs_image_rectangle_height: 250,
}
)
.progress((p) => {
console.log('progress', p);
})
.then(({ text }) => {
console.log(text);
worker.terminate();
});
```

25

docs/tesseract_parameters.md

@@ -27,12 +27,15 @@ Tesseract.js Parameters

| tessedit\_char\_whitelist | string | '' | setting white list characters makes the result only contains these characters, useful the content in image is limited |
| tessedit\_create\_pdf | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js generates a pdf output |
| tessedit\_create\_hocr | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes hocr in the result |
| tessedit\_create\_tsv | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes tsv in the result |
| tessedit\_create\_box | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes box in the result |
| tessedit\_create\_unlv | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes unlv in the result |
| tessedit\_create\_osd | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes osd in the result |
| pdf\_name | string | 'tesseract.js-ocr-result' | the name of the generated pdf file |
| pdf\_title | string | 'Tesseract.js OCR Result' | the title of the generated pdf file |
| pdf\_auto\_download | boolean | true | If the value is true, tesseract.js will automatic download/writeFile pdf file |
| pdf\_bin | boolean | false | whether to include pdf binary array in the result object (result.files.pdf) |
| tessjs\_create\_pdf | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js generates a pdf output |
| tessjs\_create\_hocr | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes hocr in the result |
| tessjs\_create\_tsv | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes tsv in the result |
| tessjs\_create\_box | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes box in the result |
| tessjs\_create\_unlv | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes unlv in the result |
| tessjs\_create\_osd | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes osd in the result |
| tessjs\_pdf\_name | string | 'tesseract.js-ocr-result' | the name of the generated pdf file |
| tessjs\_pdf\_title | string | 'Tesseract.js OCR Result' | the title of the generated pdf file |
| tessjs\_pdf\_auto\_download | boolean | true | If the value is true, tesseract.js will automatic download/writeFile pdf file |
| tessjs\_pdf\_bin | boolean | false | whether to include pdf binary array in the result object (result.files.pdf) |
| tessjs\_image\_rectangle\_left | number | 0 | The left of the sub-rectangle of the image. |
| tessjs\_image\_rectangle\_top | number | 0 | The top of the sub-rectangle of the image. |
| tessjs\_image\_rectangle\_width | number | -1 | The width of the sub-rectangle of the image, -1 means auto width detection |
| tessjs\_image\_rectangle\_height | number | -1 | The height of the sub-rectangle of the image, -1 means auto height detection |
{
"name": "tesseract.js",
"version": "2.0.0-alpha.11",
"version": "2.0.0-alpha.12",
"description": "Pure Javascript Multilingual OCR",

@@ -19,3 +19,4 @@ "main": "src/index.js",

"test:browser:recognize": "npm run test:browser-tpl -- -f ./tests/recognize.test.html",
"lint": "eslint src"
"lint": "eslint src",
"postinstall": "opencollective-postinstall || true"
},

@@ -58,2 +59,3 @@ "browser": {

"node-fetch": "^2.3.0",
"opencollective-postinstall": "^2.0.2",
"resolve-url": "^0.2.1",

@@ -70,3 +72,7 @@ "tesseract.js-core": "^2.0.0-beta.10",

},
"homepage": "https://github.com/naptha/tesseract.js"
"homepage": "https://github.com/naptha/tesseract.js",
"collective": {
"type": "opencollective",
"url": "https://opencollective.com/tesseractjs"
}
}

@@ -6,3 +6,3 @@ <p align="center">

[![Build Status](https://travis-ci.org/naptha/tesseract.js.svg?branch=master)](https://travis-ci.org/naptha/tesseract.js)
[![npm version](https://badge.fury.io/js/tesseract.js.svg)](https://badge.fury.io/js/tesseract.js)
[![Financial Contributors on Open Collective](https://opencollective.com/tesseractjs/all/badge.svg?label=financial+contributors)](https://opencollective.com/tesseractjs) [![npm version](https://badge.fury.io/js/tesseract.js.svg)](https://badge.fury.io/js/tesseract.js)
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://github.com/naptha/tesseract.js/graphs/commit-activity)

@@ -26,3 +26,3 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)

Tesseract.js wraps an [emscripten](https://github.com/kripken/emscripten) [port](https://github.com/naptha/tesseract.js-core) of the [Tesseract](https://github.com/tesseract-ocr/tesseract) [OCR](https://en.wikipedia.org/wiki/Optical_character_recognition) Engine.
It works in the browser using [webpack](https://webpack.js.org/) or plain script tags with a [#CDN](CDN) and on the server with [Node.js](https://nodejs.org/en/).
It works in the browser using [webpack](https://webpack.js.org/) or plain script tags with a [CDN](#CDN) and on the server with [Node.js](https://nodejs.org/en/).
After you [install it](#installation), using it is as simple as:

@@ -49,2 +49,3 @@

- Supported image formats: png, jpg, bmp, pbm
- Support WebAssembly (fallback to ASM.js when browser doesn't support)

@@ -110,2 +111,6 @@

You can also run the development server in Gitpod ( a free online IDE and dev environment for GitHub that will automate your dev setup ) with a single click.
[![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#https://github.com/naptha/tesseract.js/blob/master/examples/browser/demo.html)
### Building Static Files

@@ -117,1 +122,31 @@ To build the compiled static files just execute the following:

This will output the files into the `dist` directory.
## Contributors
### Code Contributors
This project exists thanks to all the people who contribute. [[Contribute](CONTRIBUTING.md)].
<a href="https://github.com/naptha/tesseract.js/graphs/contributors"><img src="https://opencollective.com/tesseractjs/contributors.svg?width=890&button=false" /></a>
### Financial Contributors
Become a financial contributor and help us sustain our community. [[Contribute](https://opencollective.com/tesseractjs/contribute)]
#### Individuals
<a href="https://opencollective.com/tesseractjs"><img src="https://opencollective.com/tesseractjs/individuals.svg?width=890"></a>
#### Organizations
Support this project with your organization. Your logo will show up here with a link to your website. [[Contribute](https://opencollective.com/tesseractjs/contribute)]
<a href="https://opencollective.com/tesseractjs/organization/0/website"><img src="https://opencollective.com/tesseractjs/organization/0/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/1/website"><img src="https://opencollective.com/tesseractjs/organization/1/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/2/website"><img src="https://opencollective.com/tesseractjs/organization/2/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/3/website"><img src="https://opencollective.com/tesseractjs/organization/3/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/4/website"><img src="https://opencollective.com/tesseractjs/organization/4/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/5/website"><img src="https://opencollective.com/tesseractjs/organization/5/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/6/website"><img src="https://opencollective.com/tesseractjs/organization/6/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/7/website"><img src="https://opencollective.com/tesseractjs/organization/7/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/8/website"><img src="https://opencollective.com/tesseractjs/organization/8/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/9/website"><img src="https://opencollective.com/tesseractjs/organization/9/avatar.svg"></a>

@@ -23,2 +23,5 @@ const path = require('path');

],
devServer: {
allowedHosts: ['localhost', '.gitpod.io'],
},
});

@@ -25,0 +28,0 @@

@@ -46,7 +46,7 @@ /**

module.exports = (TessModule, api, {
tessedit_create_hocr,
tessedit_create_tsv,
tessedit_create_box,
tessedit_create_unlv,
tessedit_create_osd,
tessjs_create_hocr,
tessjs_create_tsv,
tessjs_create_box,
tessjs_create_unlv,
tessjs_create_osd,
}) => {

@@ -187,7 +187,7 @@ const ri = api.GetIterator();

text: api.GetUTF8Text(),
hocr: tessedit_create_hocr === '1' ? deindent(api.GetHOCRText()) : null,
tsv: tessedit_create_tsv === '1' ? api.GetTSVText() : null,
box: tessedit_create_box === '1' ? api.GetBoxText() : null,
unlv: tessedit_create_unlv === '1' ? api.GetUNLVText() : null,
osd: tessedit_create_osd === '1' ? api.GetOsdText() : null,
hocr: tessjs_create_hocr === '1' ? deindent(api.GetHOCRText()) : null,
tsv: tessjs_create_tsv === '1' ? api.GetTSVText() : null,
box: tessjs_create_box === '1' ? api.GetBoxText() : null,
unlv: tessjs_create_unlv === '1' ? api.GetUNLVText() : null,
osd: tessjs_create_osd === '1' ? api.GetOsdText() : null,
confidence: api.MeanTextConf(),

@@ -194,0 +194,0 @@ blocks,

@@ -19,14 +19,18 @@ const { OEM, PSM } = require('./types');

tessedit_char_whiltelist: '',
tessedit_create_pdf: '0',
tessedit_create_hocr: '1',
tessedit_create_tsv: '1',
tessedit_create_box: '0',
tessedit_create_unlv: '0',
tessedit_create_osd: '0',
textonly_pdf: '0',
pdf_name: 'tesseract.js-ocr-result',
pdf_title: 'Tesseract.js OCR Result',
pdf_auto_download: true,
pdf_bin: false,
tessjs_create_pdf: '0',
tessjs_create_hocr: '1',
tessjs_create_tsv: '1',
tessjs_create_box: '0',
tessjs_create_unlv: '0',
tessjs_create_osd: '0',
tessjs_textonly_pdf: '0',
tessjs_pdf_name: 'tesseract.js-ocr-result',
tessjs_pdf_title: 'Tesseract.js OCR Result',
tessjs_pdf_auto_download: true,
tessjs_pdf_bin: false,
tessjs_image_rectangle_left: 0,
tessjs_image_rectangle_top: 0,
tessjs_image_rectangle_width: -1,
tessjs_image_rectangle_height: -1,
},
};

@@ -51,10 +51,10 @@ /**

then(resolve, reject) {
if (this._resolve.push) {
this._resolve.push(resolve);
} else {
resolve(this._resolve);
}
if (reject) this.catch(reject);
return this;
return new Promise((res, rej) => {
if (!this._resolve.push) {
res(this._result);
} else {
this._resolve.push(res);
}
this.catch(rej);
}).then(resolve, reject);
}

@@ -61,0 +61,0 @@

@@ -35,4 +35,10 @@ /**

*/
const setImage = (image) => {
const setImage = (image, params) => {
const {
tessjs_image_rectangle_left: left,
tessjs_image_rectangle_top: top,
tessjs_image_rectangle_width: width,
tessjs_image_rectangle_height: height,
} = params;
const {
w, h, bytesPerPixel, data, pix,

@@ -52,3 +58,8 @@ } = readImage(TessModule, Array.from(image));

}
api.SetRectangle(0, 0, w, h);
api.SetRectangle(
(left < 0) ? 0 : left,
(top < 0) ? 0 : top,
(width < 0) ? w : width,
(height < 0) ? h : height,
);
return data === null ? pix : data;

@@ -79,3 +90,5 @@ };

Object.keys(params).forEach((key) => {
api.SetVariable(key, params[key]);
if (!key.startsWith('tessjs')) {
api.SetVariable(key, params[key]);
}
});

@@ -95,8 +108,8 @@ };

const {
tessedit_create_pdf,
textonly_pdf,
pdf_name,
pdf_title,
pdf_auto_download,
pdf_bin,
tessjs_create_pdf,
tessjs_textonly_pdf,
tessjs_pdf_name,
tessjs_pdf_title,
tessjs_pdf_auto_download,
tessjs_pdf_bin,
} = {

@@ -107,5 +120,5 @@ ...defaultParams,

if (tessedit_create_pdf === '1') {
const pdfRenderer = new TessModule.TessPDFRenderer(pdf_name, '/', textonly_pdf === '1');
pdfRenderer.BeginDocument(pdf_title);
if (tessjs_create_pdf === '1') {
const pdfRenderer = new TessModule.TessPDFRenderer(tessjs_pdf_name, '/', tessjs_textonly_pdf === '1');
pdfRenderer.BeginDocument(tessjs_pdf_title);
pdfRenderer.AddImage(api);

@@ -115,10 +128,10 @@ pdfRenderer.EndDocument();

const data = TessModule.FS.readFile(`/${pdf_name}.pdf`);
const data = TessModule.FS.readFile(`/${tessjs_pdf_name}.pdf`);
if (pdf_bin) {
if (tessjs_pdf_bin) {
files = { pdf: data, ...files };
}
if (pdf_auto_download) {
adapter.writeFile(`${pdf_name}.pdf`, data, 'application/pdf');
if (tessjs_pdf_auto_download) {
adapter.writeFile(`${tessjs_pdf_name}.pdf`, data, 'application/pdf');
}

@@ -214,20 +227,24 @@ }

.then(() => {
const progressUpdate = (progress) => {
res.progress({ status: 'initializing api', progress });
};
const params = {
...defaultParams,
...customParams,
};
progressUpdate(0);
handleParams(langs, params);
progressUpdate(0.5);
const ptr = setImage(image);
progressUpdate(1);
api.Recognize(null);
const files = handleOutput(params);
const result = dump(TessModule, api, params);
api.End();
TessModule._free(ptr);
res.resolve({ files, ...result });
try {
const progressUpdate = (progress) => {
res.progress({ status: 'initializing api', progress });
};
const params = {
...defaultParams,
...customParams,
};
progressUpdate(0);
handleParams(langs, params);
progressUpdate(0.5);
const ptr = setImage(image, params);
progressUpdate(1);
api.Recognize(null);
const files = handleOutput(params);
const result = dump(TessModule, api, params);
api.End();
TessModule._free(ptr);
res.resolve({ files, ...result });
} catch (err) {
res.reject({ err });
}
})

@@ -250,3 +267,3 @@ ))

const handleDetect = ({
image, langs, options,
image, langs, options, params: customParams,
}, res) => (

@@ -259,4 +276,8 @@ handleInit(options, res)

api.SetPageSegMode(TessModule.PSM_OSD_ONLY);
const params = {
...defaultParams,
...customParams,
};
const ptr = setImage(image);
const ptr = setImage(image, params);
const results = new TessModule.OSResults();

@@ -263,0 +284,0 @@

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Packages

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc