Socket
Socket
Sign inDemoInstall

tesseract.js

Package Overview
Dependencies
Maintainers
4
Versions
68
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

tesseract.js - npm Package Compare versions

Comparing version 4.0.6 to 4.1.0

4

package.json
{
"name": "tesseract.js",
"version": "4.0.6",
"version": "4.1.0",
"description": "Pure Javascript Multilingual OCR",

@@ -66,3 +66,2 @@ "main": "src/index.js",

"bmp-js": "^0.1.0",
"file-type": "^12.4.2",
"idb-keyval": "^6.2.0",

@@ -74,3 +73,2 @@ "is-electron": "^2.2.2",

"regenerator-runtime": "^0.13.3",
"resolve-url": "^0.2.1",
"tesseract.js-core": "^4.0.4",

@@ -77,0 +75,0 @@ "wasm-feature-detect": "^1.2.11",

@@ -73,2 +73,3 @@ declare namespace Tesseract {

blocks: boolean;
layoutBlocks: boolean;
hocr: boolean;

@@ -149,3 +150,3 @@ tsv: boolean;

type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement
| CanvasRenderingContext2D | File | Blob | ImageData | Buffer;
| CanvasRenderingContext2D | File | Blob | ImageData | Buffer | OffscreenCanvas;
interface Block {

@@ -152,0 +153,0 @@ paragraphs: Paragraph[];

const isBrowser = require('./getEnvironment')('type') === 'browser';
const resolveURL = isBrowser ? require('resolve-url') : s => s; // eslint-disable-line
const resolveURL = isBrowser ? s => (new URL(s, window.location.href)).href : s => s; // eslint-disable-line
module.exports = (options) => {

@@ -5,0 +6,0 @@ const opts = { ...options };

@@ -8,2 +8,3 @@ /*

blocks: true,
layoutBlocks: false,
hocr: true,

@@ -18,2 +19,3 @@ tsv: true,

imageBinary: false,
debug: false,
};

@@ -11,6 +11,5 @@ /**

require('regenerator-runtime/runtime');
const fileType = require('file-type');
const isURL = require('is-url');
const dump = require('./utils/dump');
const isWebWorker = require('../utils/getEnvironment')('type') === 'webworker';
const env = require('../utils/getEnvironment')('type');
const setImage = require('./utils/setImage');

@@ -93,2 +92,4 @@ const defaultParams = require('./constants/defaultParams');

// Check for existing .traineddata file in cache
// This automatically fails if cacheMethod is set to 'refresh' or 'none'
try {

@@ -103,2 +104,3 @@ const _data = await readCache(`${cachePath || '.'}/${lang}.traineddata`);

}
// Attempt to fetch new .traineddata file
} catch (e) {

@@ -110,9 +112,13 @@ newData = true;

if (isURL(langPath) || langPath.startsWith('moz-extension://') || langPath.startsWith('chrome-extension://') || langPath.startsWith('file://')) { /** When langPath is an URL */
// For Node.js, langPath may be a URL or local file path
// The is-url package is used to tell the difference
// For the browser version, langPath is assumed to be a URL
if (env !== 'node' || isURL(langPath) || langPath.startsWith('moz-extension://') || langPath.startsWith('chrome-extension://') || langPath.startsWith('file://')) { /** When langPath is an URL */
path = langPath.replace(/\/$/, '');
}
// langPath is a URL, fetch from server
if (path !== null) {
const fetchUrl = `${path}/${lang}.traineddata${gzip ? '.gz' : ''}`;
const resp = await (isWebWorker ? fetch : adapter.fetch)(fetchUrl);
const resp = await (env === 'webworker' ? fetch : adapter.fetch)(fetchUrl);
if (!resp.ok) {

@@ -122,2 +128,5 @@ throw Error(`Network error while fetching ${fetchUrl}. Response code: ${resp.status}`);

data = await resp.arrayBuffer();
// langPath is a local file, read .traineddata from local filesystem
// (adapter.readCache is a generic file read function in Node.js version)
} else {

@@ -133,4 +142,6 @@ data = await adapter.readCache(`${langPath}/${lang}.traineddata${gzip ? '.gz' : ''}`);

const type = fileType(data);
if (typeof type !== 'undefined' && type.mime === 'application/gzip') {
// Check for gzip magic numbers (1F and 8B in hex)
const isGzip = (data[0] === 31 && data[1] === 139) || (data[1] === 31 && data[0] === 139);
if (isGzip) {
data = adapter.gunzip(data);

@@ -264,3 +275,3 @@ }

const nonRecOutputs = ['imageColor', 'imageGrey', 'imageBinary'];
const nonRecOutputs = ['imageColor', 'imageGrey', 'imageBinary', 'layoutBlocks'];
let recOutputCount = 0;

@@ -277,3 +288,4 @@ for (const prop of Object.keys(output)) {

}
return { workingOutput, recOutputCount };
const skipRecognition = recOutputCount === 0;
return { workingOutput, skipRecognition };
};

@@ -313,3 +325,3 @@

const { workingOutput, recOutputCount } = processOutput(output);
const { workingOutput, skipRecognition } = processOutput(output);

@@ -364,5 +376,8 @@ // When the auto-rotate option is True, setImage is called with no angle,

if (recOutputCount > 0) {
if (!skipRecognition) {
api.Recognize(null);
} else {
if (output.layoutBlocks) {
api.AnalyseLayout();
}
log('Skipping recognition: all output options requiring recognition are disabled.');

@@ -372,3 +387,3 @@ }

const { pdfTextOnly } = options;
const result = dump(TessModule, api, workingOutput, { pdfTitle, pdfTextOnly });
const result = dump(TessModule, api, workingOutput, { pdfTitle, pdfTextOnly, skipRecognition });
result.rotateRadians = rotateRadiansFinal;

@@ -375,0 +390,0 @@

@@ -82,3 +82,6 @@ /**

if (output.blocks) {
// If output.layoutBlocks is true and options.skipRecognition is true,
// the user wants layout data but text recognition has not been run.
// In this case, fields that require text recognition are skipped.
if (output.blocks || output.layoutBlocks) {
ri.Begin();

@@ -106,4 +109,4 @@ do {

paragraphs: [],
text: ri.GetUTF8Text(RIL_BLOCK),
confidence: ri.Confidence(RIL_BLOCK),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_BLOCK) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_BLOCK) : null,
baseline: ri.getBaseline(RIL_BLOCK),

@@ -119,4 +122,4 @@ bbox: ri.getBoundingBox(RIL_BLOCK),

lines: [],
text: ri.GetUTF8Text(RIL_PARA),
confidence: ri.Confidence(RIL_PARA),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_PARA) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_PARA) : null,
baseline: ri.getBaseline(RIL_PARA),

@@ -131,4 +134,4 @@ bbox: ri.getBoundingBox(RIL_PARA),

words: [],
text: ri.GetUTF8Text(RIL_TEXTLINE),
confidence: ri.Confidence(RIL_TEXTLINE),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_TEXTLINE) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_TEXTLINE) : null,
baseline: ri.getBaseline(RIL_TEXTLINE),

@@ -146,4 +149,4 @@ bbox: ri.getBoundingBox(RIL_TEXTLINE),

text: ri.GetUTF8Text(RIL_WORD),
confidence: ri.Confidence(RIL_WORD),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_WORD) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_WORD) : null,
baseline: ri.getBaseline(RIL_WORD),

@@ -170,4 +173,4 @@ bbox: ri.getBoundingBox(RIL_WORD),

word.choices.push({
text: wc.GetUTF8Text(),
confidence: wc.Confidence(),
text: !options.skipRecognition ? wc.GetUTF8Text() : null,
confidence: !options.skipRecognition ? wc.Confidence() : null,
});

@@ -188,4 +191,4 @@ } while (wc.Next());

image: null,
text: ri.GetUTF8Text(RIL_SYMBOL),
confidence: ri.Confidence(RIL_SYMBOL),
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_SYMBOL) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_SYMBOL) : null,
baseline: ri.getBaseline(RIL_SYMBOL),

@@ -201,4 +204,4 @@ bbox: ri.getBoundingBox(RIL_SYMBOL),

symbol.choices.push({
text: ci.GetUTF8Text(),
confidence: ci.Confidence(),
text: !options.skipRecognition ? ci.GetUTF8Text() : null,
confidence: !options.skipRecognition ? ci.Confidence() : null,
});

@@ -223,4 +226,5 @@ } while (ci.Next());

imageBinary: output.imageBinary ? getImage(imageType.BINARY) : null,
confidence: api.MeanTextConf(),
blocks: output.blocks ? blocks : null,
confidence: !options.skipRecognition ? api.MeanTextConf() : null,
blocks: output.blocks && !options.skipRecognition ? blocks : null,
layoutBlocks: output.layoutBlocks && options.skipRecognition ? blocks : null,
psm: enumToString(api.GetPageSegMode(), 'PSM'),

@@ -227,0 +231,0 @@ oem: enumToString(api.oem(), 'OEM'),

const bmp = require('bmp-js');
const fileType = require('file-type');

@@ -12,3 +11,4 @@ /**

module.exports = (TessModule, api, image, angle = 0) => {
const type = fileType(image);
// Check for bmp magic numbers (42 and 4D in hex)
const isBmp = (image[0] === 66 && image[1] === 77) || (image[1] === 66 && image[0] === 77);

@@ -22,3 +22,3 @@ const exif = image.slice(0, 500).toString().match(/\x01\x12\x00\x03\x00\x00\x00\x01\x00(.)/)?.[1]?.charCodeAt(0) || 1;

// */
if (type && type.mime === 'image/bmp') {
if (isBmp) {
// Not sure what this line actually does, but removing breaks the function

@@ -25,0 +25,0 @@ const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length }));

@@ -1,2 +0,2 @@

const resolveURL = require('resolve-url');
const resolveURL = (s) => (new URL(s, window.location.href)).href;
const { version } = require('../../../package.json');

@@ -3,0 +3,0 @@ const defaultOptions = require('../../constants/defaultOptions');

@@ -1,3 +0,1 @@

const resolveURL = require('resolve-url');
/**

@@ -43,6 +41,6 @@ * readFromBlobOrFile

} else {
const resp = await fetch(resolveURL(image));
const resp = await fetch(image);
data = await resp.arrayBuffer();
}
} else if (image instanceof HTMLElement) {
} else if (typeof HTMLElement !== 'undefined' && image instanceof HTMLElement) {
if (image.tagName === 'IMG') {

@@ -62,2 +60,5 @@ data = await loadImage(image.src);

}
} else if (typeof OffscreenCanvas !== 'undefined' && image instanceof OffscreenCanvas) {
const blob = await image.convertToBlob();
data = await readFromBlobOrFile(blob);
} else if (image instanceof File || image instanceof Blob) {

@@ -64,0 +65,0 @@ data = await readFromBlobOrFile(image);

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc