tesseract.js
Advanced tools
Comparing version 2.0.0-alpha.11 to 2.0.0-alpha.12
@@ -8,3 +8,4 @@ # Tesseract.js Examples | ||
- Offline version: https://github.com/jeromewu/tesseract.js-offline | ||
- With Vue (similar with React/Angular): https://github.com/jeromewu/tesseract-vue-app | ||
- With Vue: https://github.com/jeromewu/tesseract.js-vue-app | ||
- With Angular: https://github.com/jeromewu/tesseract.js-angular-app | ||
- Chrome Extension: https://github.com/jeromewu/tesseract.js-chrome-extension | ||
@@ -127,3 +128,3 @@ | ||
### with pdf output (^2.0.0-alpha.7) | ||
### with pdf output (^2.0.0-alpha.12) | ||
@@ -143,3 +144,3 @@ In this example, pdf file will be downloaded in browser and write to file system in Node.js | ||
{ | ||
'tessedit_create_pdf': '1', | ||
'tessjs_create_pdf': '1', | ||
} | ||
@@ -169,5 +170,5 @@ ) | ||
{ | ||
'tessedit_create_pdf': '1', | ||
'pdf_auto_download': false, // disable auto download | ||
'pdf_bin': true, // add pdf file bin array in result | ||
'tessjs_create_pdf': '1', | ||
'tessjs_pdf_auto_download': false, // disable auto download | ||
'tessjs_pdf_bin': true, // add pdf file bin array in result | ||
} | ||
@@ -204,1 +205,29 @@ ) | ||
``` | ||
### with only part of the image (^2.0.0-alpha.12) | ||
```javascript | ||
import Tesseract from 'tesseract.js'; | ||
const { TesseractWorker } = Tesseract; | ||
const worker = new TesseractWorker(); | ||
worker | ||
.recognize( | ||
'https://tesseract.projectnaptha.com/img/eng_bw.png', | ||
'eng', | ||
{ | ||
tessjs_image_rectangle_left: 0, | ||
tessjs_image_rectangle_top: 0, | ||
tessjs_image_rectangle_width: 500, | ||
tessjs_image_rectangle_height: 250, | ||
} | ||
) | ||
.progress((p) => { | ||
console.log('progress', p); | ||
}) | ||
.then(({ text }) => { | ||
console.log(text); | ||
worker.terminate(); | ||
}); | ||
``` |
@@ -27,12 +27,15 @@ Tesseract.js Parameters | ||
| tessedit\_char\_whitelist | string | '' | setting white list characters makes the result only contains these characters, useful the content in image is limited | | ||
| tessedit\_create\_pdf | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js generates a pdf output | | ||
| tessedit\_create\_hocr | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes hocr in the result | | ||
| tessedit\_create\_tsv | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes tsv in the result | | ||
| tessedit\_create\_box | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes box in the result | | ||
| tessedit\_create\_unlv | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes unlv in the result | | ||
| tessedit\_create\_osd | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes osd in the result | | ||
| pdf\_name | string | 'tesseract.js-ocr-result' | the name of the generated pdf file | | ||
| pdf\_title | string | 'Tesseract.js OCR Result' | the title of the generated pdf file | | ||
| pdf\_auto\_download | boolean | true | If the value is true, tesseract.js will automatic download/writeFile pdf file | | ||
| pdf\_bin | boolean | false | whether to include pdf binary array in the result object (result.files.pdf) | | ||
| tessjs\_create\_pdf | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js generates a pdf output | | ||
| tessjs\_create\_hocr | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes hocr in the result | | ||
| tessjs\_create\_tsv | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes tsv in the result | | ||
| tessjs\_create\_box | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes box in the result | | ||
| tessjs\_create\_unlv | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes unlv in the result | | ||
| tessjs\_create\_osd | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes osd in the result | | ||
| tessjs\_pdf\_name | string | 'tesseract.js-ocr-result' | the name of the generated pdf file | | ||
| tessjs\_pdf\_title | string | 'Tesseract.js OCR Result' | the title of the generated pdf file | | ||
| tessjs\_pdf\_auto\_download | boolean | true | If the value is true, tesseract.js will automatic download/writeFile pdf file | | ||
| tessjs\_pdf\_bin | boolean | false | whether to include pdf binary array in the result object (result.files.pdf) | | ||
| tessjs\_image\_rectangle\_left | number | 0 | The left of the sub-rectangle of the image. | | ||
| tessjs\_image\_rectangle\_top | number | 0 | The top of the sub-rectangle of the image. | | ||
| tessjs\_image\_rectangle\_width | number | -1 | The width of the sub-rectangle of the image, -1 means auto width detection | | ||
| tessjs\_image\_rectangle\_height | number | -1 | The height of the sub-rectangle of the image, -1 means auto height detection | |
{ | ||
"name": "tesseract.js", | ||
"version": "2.0.0-alpha.11", | ||
"version": "2.0.0-alpha.12", | ||
"description": "Pure Javascript Multilingual OCR", | ||
@@ -19,3 +19,4 @@ "main": "src/index.js", | ||
"test:browser:recognize": "npm run test:browser-tpl -- -f ./tests/recognize.test.html", | ||
"lint": "eslint src" | ||
"lint": "eslint src", | ||
"postinstall": "opencollective-postinstall || true" | ||
}, | ||
@@ -58,2 +59,3 @@ "browser": { | ||
"node-fetch": "^2.3.0", | ||
"opencollective-postinstall": "^2.0.2", | ||
"resolve-url": "^0.2.1", | ||
@@ -70,3 +72,7 @@ "tesseract.js-core": "^2.0.0-beta.10", | ||
}, | ||
"homepage": "https://github.com/naptha/tesseract.js" | ||
"homepage": "https://github.com/naptha/tesseract.js", | ||
"collective": { | ||
"type": "opencollective", | ||
"url": "https://opencollective.com/tesseractjs" | ||
} | ||
} |
@@ -6,3 +6,3 @@ <p align="center"> | ||
[![Build Status](https://travis-ci.org/naptha/tesseract.js.svg?branch=master)](https://travis-ci.org/naptha/tesseract.js) | ||
[![npm version](https://badge.fury.io/js/tesseract.js.svg)](https://badge.fury.io/js/tesseract.js) | ||
[![Financial Contributors on Open Collective](https://opencollective.com/tesseractjs/all/badge.svg?label=financial+contributors)](https://opencollective.com/tesseractjs) [![npm version](https://badge.fury.io/js/tesseract.js.svg)](https://badge.fury.io/js/tesseract.js) | ||
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://github.com/naptha/tesseract.js/graphs/commit-activity) | ||
@@ -26,3 +26,3 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) | ||
Tesseract.js wraps an [emscripten](https://github.com/kripken/emscripten) [port](https://github.com/naptha/tesseract.js-core) of the [Tesseract](https://github.com/tesseract-ocr/tesseract) [OCR](https://en.wikipedia.org/wiki/Optical_character_recognition) Engine. | ||
It works in the browser using [webpack](https://webpack.js.org/) or plain script tags with a [#CDN](CDN) and on the server with [Node.js](https://nodejs.org/en/). | ||
It works in the browser using [webpack](https://webpack.js.org/) or plain script tags with a [CDN](#CDN) and on the server with [Node.js](https://nodejs.org/en/). | ||
After you [install it](#installation), using it is as simple as: | ||
@@ -49,2 +49,3 @@ | ||
- Supported image formats: png, jpg, bmp, pbm | ||
- Support WebAssembly (fallback to ASM.js when browser doesn't support) | ||
@@ -110,2 +111,6 @@ | ||
You can also run the development server in Gitpod ( a free online IDE and dev environment for GitHub that will automate your dev setup ) with a single click. | ||
[![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#https://github.com/naptha/tesseract.js/blob/master/examples/browser/demo.html) | ||
### Building Static Files | ||
@@ -117,1 +122,31 @@ To build the compiled static files just execute the following: | ||
This will output the files into the `dist` directory. | ||
## Contributors | ||
### Code Contributors | ||
This project exists thanks to all the people who contribute. [[Contribute](CONTRIBUTING.md)]. | ||
<a href="https://github.com/naptha/tesseract.js/graphs/contributors"><img src="https://opencollective.com/tesseractjs/contributors.svg?width=890&button=false" /></a> | ||
### Financial Contributors | ||
Become a financial contributor and help us sustain our community. [[Contribute](https://opencollective.com/tesseractjs/contribute)] | ||
#### Individuals | ||
<a href="https://opencollective.com/tesseractjs"><img src="https://opencollective.com/tesseractjs/individuals.svg?width=890"></a> | ||
#### Organizations | ||
Support this project with your organization. Your logo will show up here with a link to your website. [[Contribute](https://opencollective.com/tesseractjs/contribute)] | ||
<a href="https://opencollective.com/tesseractjs/organization/0/website"><img src="https://opencollective.com/tesseractjs/organization/0/avatar.svg"></a> | ||
<a href="https://opencollective.com/tesseractjs/organization/1/website"><img src="https://opencollective.com/tesseractjs/organization/1/avatar.svg"></a> | ||
<a href="https://opencollective.com/tesseractjs/organization/2/website"><img src="https://opencollective.com/tesseractjs/organization/2/avatar.svg"></a> | ||
<a href="https://opencollective.com/tesseractjs/organization/3/website"><img src="https://opencollective.com/tesseractjs/organization/3/avatar.svg"></a> | ||
<a href="https://opencollective.com/tesseractjs/organization/4/website"><img src="https://opencollective.com/tesseractjs/organization/4/avatar.svg"></a> | ||
<a href="https://opencollective.com/tesseractjs/organization/5/website"><img src="https://opencollective.com/tesseractjs/organization/5/avatar.svg"></a> | ||
<a href="https://opencollective.com/tesseractjs/organization/6/website"><img src="https://opencollective.com/tesseractjs/organization/6/avatar.svg"></a> | ||
<a href="https://opencollective.com/tesseractjs/organization/7/website"><img src="https://opencollective.com/tesseractjs/organization/7/avatar.svg"></a> | ||
<a href="https://opencollective.com/tesseractjs/organization/8/website"><img src="https://opencollective.com/tesseractjs/organization/8/avatar.svg"></a> | ||
<a href="https://opencollective.com/tesseractjs/organization/9/website"><img src="https://opencollective.com/tesseractjs/organization/9/avatar.svg"></a> |
@@ -23,2 +23,5 @@ const path = require('path'); | ||
], | ||
devServer: { | ||
allowedHosts: ['localhost', '.gitpod.io'], | ||
}, | ||
}); | ||
@@ -25,0 +28,0 @@ |
@@ -46,7 +46,7 @@ /** | ||
module.exports = (TessModule, api, { | ||
tessedit_create_hocr, | ||
tessedit_create_tsv, | ||
tessedit_create_box, | ||
tessedit_create_unlv, | ||
tessedit_create_osd, | ||
tessjs_create_hocr, | ||
tessjs_create_tsv, | ||
tessjs_create_box, | ||
tessjs_create_unlv, | ||
tessjs_create_osd, | ||
}) => { | ||
@@ -187,7 +187,7 @@ const ri = api.GetIterator(); | ||
text: api.GetUTF8Text(), | ||
hocr: tessedit_create_hocr === '1' ? deindent(api.GetHOCRText()) : null, | ||
tsv: tessedit_create_tsv === '1' ? api.GetTSVText() : null, | ||
box: tessedit_create_box === '1' ? api.GetBoxText() : null, | ||
unlv: tessedit_create_unlv === '1' ? api.GetUNLVText() : null, | ||
osd: tessedit_create_osd === '1' ? api.GetOsdText() : null, | ||
hocr: tessjs_create_hocr === '1' ? deindent(api.GetHOCRText()) : null, | ||
tsv: tessjs_create_tsv === '1' ? api.GetTSVText() : null, | ||
box: tessjs_create_box === '1' ? api.GetBoxText() : null, | ||
unlv: tessjs_create_unlv === '1' ? api.GetUNLVText() : null, | ||
osd: tessjs_create_osd === '1' ? api.GetOsdText() : null, | ||
confidence: api.MeanTextConf(), | ||
@@ -194,0 +194,0 @@ blocks, |
@@ -19,14 +19,18 @@ const { OEM, PSM } = require('./types'); | ||
tessedit_char_whiltelist: '', | ||
tessedit_create_pdf: '0', | ||
tessedit_create_hocr: '1', | ||
tessedit_create_tsv: '1', | ||
tessedit_create_box: '0', | ||
tessedit_create_unlv: '0', | ||
tessedit_create_osd: '0', | ||
textonly_pdf: '0', | ||
pdf_name: 'tesseract.js-ocr-result', | ||
pdf_title: 'Tesseract.js OCR Result', | ||
pdf_auto_download: true, | ||
pdf_bin: false, | ||
tessjs_create_pdf: '0', | ||
tessjs_create_hocr: '1', | ||
tessjs_create_tsv: '1', | ||
tessjs_create_box: '0', | ||
tessjs_create_unlv: '0', | ||
tessjs_create_osd: '0', | ||
tessjs_textonly_pdf: '0', | ||
tessjs_pdf_name: 'tesseract.js-ocr-result', | ||
tessjs_pdf_title: 'Tesseract.js OCR Result', | ||
tessjs_pdf_auto_download: true, | ||
tessjs_pdf_bin: false, | ||
tessjs_image_rectangle_left: 0, | ||
tessjs_image_rectangle_top: 0, | ||
tessjs_image_rectangle_width: -1, | ||
tessjs_image_rectangle_height: -1, | ||
}, | ||
}; |
@@ -51,10 +51,10 @@ /** | ||
then(resolve, reject) { | ||
if (this._resolve.push) { | ||
this._resolve.push(resolve); | ||
} else { | ||
resolve(this._resolve); | ||
} | ||
if (reject) this.catch(reject); | ||
return this; | ||
return new Promise((res, rej) => { | ||
if (!this._resolve.push) { | ||
res(this._result); | ||
} else { | ||
this._resolve.push(res); | ||
} | ||
this.catch(rej); | ||
}).then(resolve, reject); | ||
} | ||
@@ -61,0 +61,0 @@ |
@@ -35,4 +35,10 @@ /** | ||
*/ | ||
const setImage = (image) => { | ||
const setImage = (image, params) => { | ||
const { | ||
tessjs_image_rectangle_left: left, | ||
tessjs_image_rectangle_top: top, | ||
tessjs_image_rectangle_width: width, | ||
tessjs_image_rectangle_height: height, | ||
} = params; | ||
const { | ||
w, h, bytesPerPixel, data, pix, | ||
@@ -52,3 +58,8 @@ } = readImage(TessModule, Array.from(image)); | ||
} | ||
api.SetRectangle(0, 0, w, h); | ||
api.SetRectangle( | ||
(left < 0) ? 0 : left, | ||
(top < 0) ? 0 : top, | ||
(width < 0) ? w : width, | ||
(height < 0) ? h : height, | ||
); | ||
return data === null ? pix : data; | ||
@@ -79,3 +90,5 @@ }; | ||
Object.keys(params).forEach((key) => { | ||
api.SetVariable(key, params[key]); | ||
if (!key.startsWith('tessjs')) { | ||
api.SetVariable(key, params[key]); | ||
} | ||
}); | ||
@@ -95,8 +108,8 @@ }; | ||
const { | ||
tessedit_create_pdf, | ||
textonly_pdf, | ||
pdf_name, | ||
pdf_title, | ||
pdf_auto_download, | ||
pdf_bin, | ||
tessjs_create_pdf, | ||
tessjs_textonly_pdf, | ||
tessjs_pdf_name, | ||
tessjs_pdf_title, | ||
tessjs_pdf_auto_download, | ||
tessjs_pdf_bin, | ||
} = { | ||
@@ -107,5 +120,5 @@ ...defaultParams, | ||
if (tessedit_create_pdf === '1') { | ||
const pdfRenderer = new TessModule.TessPDFRenderer(pdf_name, '/', textonly_pdf === '1'); | ||
pdfRenderer.BeginDocument(pdf_title); | ||
if (tessjs_create_pdf === '1') { | ||
const pdfRenderer = new TessModule.TessPDFRenderer(tessjs_pdf_name, '/', tessjs_textonly_pdf === '1'); | ||
pdfRenderer.BeginDocument(tessjs_pdf_title); | ||
pdfRenderer.AddImage(api); | ||
@@ -115,10 +128,10 @@ pdfRenderer.EndDocument(); | ||
const data = TessModule.FS.readFile(`/${pdf_name}.pdf`); | ||
const data = TessModule.FS.readFile(`/${tessjs_pdf_name}.pdf`); | ||
if (pdf_bin) { | ||
if (tessjs_pdf_bin) { | ||
files = { pdf: data, ...files }; | ||
} | ||
if (pdf_auto_download) { | ||
adapter.writeFile(`${pdf_name}.pdf`, data, 'application/pdf'); | ||
if (tessjs_pdf_auto_download) { | ||
adapter.writeFile(`${tessjs_pdf_name}.pdf`, data, 'application/pdf'); | ||
} | ||
@@ -214,20 +227,24 @@ } | ||
.then(() => { | ||
const progressUpdate = (progress) => { | ||
res.progress({ status: 'initializing api', progress }); | ||
}; | ||
const params = { | ||
...defaultParams, | ||
...customParams, | ||
}; | ||
progressUpdate(0); | ||
handleParams(langs, params); | ||
progressUpdate(0.5); | ||
const ptr = setImage(image); | ||
progressUpdate(1); | ||
api.Recognize(null); | ||
const files = handleOutput(params); | ||
const result = dump(TessModule, api, params); | ||
api.End(); | ||
TessModule._free(ptr); | ||
res.resolve({ files, ...result }); | ||
try { | ||
const progressUpdate = (progress) => { | ||
res.progress({ status: 'initializing api', progress }); | ||
}; | ||
const params = { | ||
...defaultParams, | ||
...customParams, | ||
}; | ||
progressUpdate(0); | ||
handleParams(langs, params); | ||
progressUpdate(0.5); | ||
const ptr = setImage(image, params); | ||
progressUpdate(1); | ||
api.Recognize(null); | ||
const files = handleOutput(params); | ||
const result = dump(TessModule, api, params); | ||
api.End(); | ||
TessModule._free(ptr); | ||
res.resolve({ files, ...result }); | ||
} catch (err) { | ||
res.reject({ err }); | ||
} | ||
}) | ||
@@ -250,3 +267,3 @@ )) | ||
const handleDetect = ({ | ||
image, langs, options, | ||
image, langs, options, params: customParams, | ||
}, res) => ( | ||
@@ -259,4 +276,8 @@ handleInit(options, res) | ||
api.SetPageSegMode(TessModule.PSM_OSD_ONLY); | ||
const params = { | ||
...defaultParams, | ||
...customParams, | ||
}; | ||
const ptr = setImage(image); | ||
const ptr = setImage(image, params); | ||
const results = new TessModule.OSResults(); | ||
@@ -263,0 +284,0 @@ |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Install scripts
Supply chain riskInstall scripts are run when the package is installed. The majority of malware in npm is hidden in install scripts.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
2055901
70
2361
148
8
1
+ Addedopencollective-postinstall@2.0.3(transitive)