Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

tesseract.js

Package Overview
Dependencies
Maintainers
3
Versions
68
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

tesseract.js - npm Package Compare versions

Comparing version 2.0.0-alpha.9 to 2.0.0-alpha.10

26

docs/faq.md

@@ -15,1 +15,27 @@ FAQ

For tesseract.js v1, check [Training Tesseract 3.03–3.05](https://github.com/tesseract-ocr/tesseract/wiki/Training-Tesseract-3.03%E2%80%933.05)
## How can I get HOCR, TSV, Box, UNLV, OSD?
Starting from 2.0.0-alpha.10, you can get all these information in the final result.
```javascript
import Tesseract from 'tesseract.js';
const { TesseractWorker } = Tesseract;
const worker = new TesseractWorker();
worker
.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', 'eng', {
tessedit_create_box: '1',
tessedit_create_unlv: '1',
tessedit_create_osd: '1',
})
.then((result) => {
console.log(result.text);
console.log(result.hocr);
console.log(result.tsv);
console.log(result.box);
console.log(result.unlv);
console.log(result.osd);
});
```

8

docs/local-installation.md
## Local Installation
Check here for an example: https://github.com/jeromewu/tesseract.js-offline
Check here for examples: https://github.com/naptha/tesseract.js/blob/master/docs/examples.md

@@ -13,3 +13,3 @@ In browser environment, `tesseract.js` simply provides the API layer. Internally, it opens a WebWorker to handle requests. That worker itself loads code from the Emscripten-built `tesseract.js-core` which itself is hosted on a CDN. Then it dynamically loads language files hosted on another CDN.

const worker = Tesseract.TesseractWorker({
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0-alpha.8/dist/worker.min.js',
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0-alpha.10/dist/worker.min.js',
langPath: 'https://tessdata.projectnaptha.com/4.0.0',

@@ -27,4 +27,4 @@ corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.10/tesseract-core.wasm.js',

### corePath
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.8/tesseract-core.wasm.js' (fallback to tesseract-core.asm.js when WebAssembly is not available).
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.10/tesseract-core.wasm.js' (fallback to tesseract-core.asm.js when WebAssembly is not available).
Another WASM option is 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.8/tesseract-core.js' which is a script that loads 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.8/tesseract-core.wasm'. But it fails to fetch at this moment.
Another WASM option is 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.10/tesseract-core.js' which is a script that loads 'https://unpkg.com/tesseract.js-core@v2.0.0-beta.10/tesseract-core.wasm'. But it fails to fetch at this moment.

@@ -27,3 +27,8 @@ Tesseract.js Parameters

| tessedit\_char\_whitelist | string | '' | setting white list characters makes the result only contains these characters, useful the content in image is limited |
| tessedit\_create\_pdf | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js will generate a pdf output |
| tessedit\_create\_pdf | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js generates a pdf output |
| tessedit\_create\_hocr | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes hocr in the result |
| tessedit\_create\_tsv | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes tsv in the result |
| tessedit\_create\_box | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes box in the result |
| tessedit\_create\_unlv | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes unlv in the result |
| tessedit\_create\_osd | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes osd in the result |
| pdf\_name | string | 'tesseract.js-ocr-result' | the name of the generated pdf file |

@@ -30,0 +35,0 @@ | pdf\_title | string | 'Tesseract.js OCR Result' | the title of the generated pdf file |

{
"name": "tesseract.js",
"version": "2.0.0-alpha.9",
"version": "2.0.0-alpha.10",
"description": "Pure Javascript Multilingual OCR",

@@ -5,0 +5,0 @@ "main": "src/index.js",

@@ -42,3 +42,3 @@ # [Tesseract.js](http://tesseract.projectnaptha.com/)

```html
<script src='https://unpkg.com/tesseract.js@v2.0.0-alpha.8/dist/tesseract.min.js'></script>
<script src='https://unpkg.com/tesseract.js@v2.0.0-alpha.10/dist/tesseract.min.js'></script>
```

@@ -45,0 +45,0 @@

@@ -45,3 +45,9 @@ /**

*/
module.exports = (TessModule, api) => {
module.exports = (TessModule, api, {
tessedit_create_hocr,
tessedit_create_tsv,
tessedit_create_box,
tessedit_create_unlv,
tessedit_create_osd,
}) => {
const ri = api.GetIterator();

@@ -181,3 +187,7 @@ const blocks = [];

text: api.GetUTF8Text(),
html: deindent(api.GetHOCRText()),
hocr: tessedit_create_hocr === '1' ? deindent(api.GetHOCRText()) : null,
tsv: tessedit_create_tsv === '1' ? api.GetTSVText() : null,
box: tessedit_create_box === '1' ? api.GetBoxText() : null,
unlv: tessedit_create_unlv === '1' ? api.GetUNLVText() : null,
osd: tessedit_create_osd === '1' ? api.GetOsdText() : null,
confidence: api.MeanTextConf(),

@@ -184,0 +194,0 @@ blocks,

@@ -20,2 +20,7 @@ const { OEM, PSM } = require('./types');

tessedit_create_pdf: '0',
tessedit_create_hocr: '1',
tessedit_create_tsv: '1',
tessedit_create_box: '0',
tessedit_create_unlv: '0',
tessedit_create_osd: '0',
textonly_pdf: '0',

@@ -22,0 +27,0 @@ pdf_name: 'tesseract.js-ocr-result',

@@ -70,10 +70,7 @@ /**

*/
const handleParams = (langs, customParams) => {
const handleParams = (langs, iParams) => {
const {
tessedit_ocr_engine_mode,
...params
} = {
...defaultParams,
...customParams,
};
} = iParams;
api.Init(null, getLangsStr(langs), tessedit_ocr_engine_mode);

@@ -195,3 +192,3 @@ Object.keys(params).forEach((key) => {

const handleRecognize = ({
image, langs, options, params,
image, langs, options, params: customParams,
}, res) => (

@@ -216,2 +213,6 @@ handleInit(options, res)

};
const params = {
...defaultParams,
...customParams,
};
progressUpdate(0);

@@ -224,3 +225,3 @@ handleParams(langs, params);

const files = handleOutput(params);
const result = dump(TessModule, api);
const result = dump(TessModule, api, params);
api.End();

@@ -227,0 +228,0 @@ TessModule._free(ptr);

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc