Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

ocr-document-classification

Package Overview
Dependencies
Maintainers
1
Versions
46
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

ocr-document-classification - npm Package Compare versions

Comparing version 1.0.9 to 1.0.10

33

dist/index.js

@@ -45,3 +45,6 @@ "use strict";

targetWords.forEach((targetWord) => {
const similarity = (0, import_string_similarity_js.stringSimilarity)(docWord.toLowerCase(), targetWord.toLowerCase());
const similarity = (0, import_string_similarity_js.stringSimilarity)(
docWord.toLowerCase(),
targetWord.toLowerCase()
);
if (similarity >= threshold) {

@@ -55,3 +58,3 @@ foundTargetWords.push(targetWord);

var defaultDocumentDictionary = {
BEVISPAFORSTEGANGSTJENESTE: [
"BEVIS P\xC5 F\xD8RSTEGANGSTJENESTE": [
["f\xF8rstegangstjeneste", "bevis", "avtjent"],

@@ -63,6 +66,9 @@ ["attest", "f\xF8rstegangstjeneste"],

KOMPETANSEBEVIS: [["omfatter", "oppl\xE6ring", "utdanningsprogram"]],
LEGEERKLERING: [["legeerkl\xE6ring", "f\xF8dselsnummer"]]
LEGEERKL\u00C6RING: [["legeerkl\xE6ring", "f\xF8dselsnummer"]]
};
async function classifyDocument(file, onProgress, customDocumentDictionary) {
const documentDictionary = { ...defaultDocumentDictionary, ...customDocumentDictionary };
const documentDictionary = {
...defaultDocumentDictionary,
...customDocumentDictionary
};
let progress = 0;

@@ -86,10 +92,17 @@ const worker = await Tesseract.createWorker(["noreng"], 1, {

if (onProgress) onProgress(progress);
const { data: { text } } = await worker.recognize(imageDataUrl, {
const {
data: { text }
} = await worker.recognize(imageDataUrl, {
rotateAuto: true
});
const targetWords = Array.from(new Set(Object.values(documentDictionary).flat(2)));
const targetWords = Array.from(
new Set(Object.values(documentDictionary).flat(2))
);
const targetWordsFound = findTargetWords(text, targetWords);
progress = 0.9;
if (onProgress) onProgress(progress);
const classification = determineClassification(targetWordsFound, documentDictionary);
const classification = determineClassification(
targetWordsFound,
documentDictionary
);
progress = 1;

@@ -106,3 +119,5 @@ if (onProgress) onProgress(progress);

function determineClassification(targetWordsFound, documentDictionary) {
for (const [classification, targetWordSets] of Object.entries(documentDictionary)) {
for (const [classification, targetWordSets] of Object.entries(
documentDictionary
)) {
for (const targetWords of targetWordSets) {

@@ -114,3 +129,3 @@ if (targetWords.every((word) => targetWordsFound.includes(word))) {

}
return "UNKNOWN";
return "UKJENT";
}

@@ -117,0 +132,0 @@ // Annotate the CommonJS export names for ESM import in node:

{
"name": "ocr-document-classification",
"version": "1.0.9",
"version": "1.0.10",
"description": "Document classification using tesseract.js and string-similarity-js.",

@@ -5,0 +5,0 @@ "main": "./dist/index.js",

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc