ocr-document-classification
Advanced tools
Comparing version 1.3.0 to 1.3.1
@@ -5,3 +5,3 @@ type documentDictionary = { | ||
declare function classifyDocument(file: File, onProgress?: (progress: number) => void, customDocumentDictionary?: documentDictionary): Promise<{ | ||
declare function classifyDocument(file: File, onProgress?: (progress: number) => void, customDocumentDictionary?: documentDictionary, maxNumPages?: number): Promise<{ | ||
classification: string; | ||
@@ -8,0 +8,0 @@ text: string; |
@@ -80,3 +80,3 @@ // src/functions.ts | ||
}; | ||
async function classifyDocument(file, onProgress, customDocumentDictionary) { | ||
async function classifyDocument(file, onProgress, customDocumentDictionary, maxNumPages = Infinity) { | ||
const documentDictionary = { | ||
@@ -99,19 +99,29 @@ ...defaultDocumentDictionary, | ||
let fullText = ""; | ||
for (let i = 0; i < totalBlobs; i++) { | ||
const text = await ocrBlob(BlobArray[i]); | ||
fullText += text + "\n"; | ||
progress = Math.round((i + 1) / totalBlobs * 100); | ||
if (onProgress) { | ||
onProgress(progress); | ||
} | ||
} | ||
const targetWords = Array.from( | ||
new Set(Object.values(documentDictionary).flat(2)) | ||
); | ||
const targetWordsFound = findTargetWords(fullText, targetWords); | ||
const classification = determineClassification( | ||
targetWordsFound, | ||
documentDictionary | ||
); | ||
return { classification, text: fullText }; | ||
try { | ||
for (let i = 0; i < totalBlobs; i++) { | ||
if (i >= maxNumPages - 1) { | ||
break; | ||
} | ||
const text = await ocrBlob(BlobArray[i]); | ||
fullText += text; | ||
progress = Math.round((i + 1) / totalBlobs * 100); | ||
if (onProgress) { | ||
onProgress(progress); | ||
} | ||
const targetWordsFound = findTargetWords(text, targetWords); | ||
const classification = determineClassification( | ||
targetWordsFound, | ||
documentDictionary | ||
); | ||
if (classification !== "UKJENT") { | ||
return { classification, text: fullText }; | ||
} | ||
} | ||
} catch (error) { | ||
console.log("Error in ocr process: ", error); | ||
} | ||
return { classification: "UKJENT", text: fullText }; | ||
} | ||
@@ -137,3 +147,2 @@ function determineClassification(targetWordsFound, documentDictionary) { | ||
} | ||
console.log(m); | ||
} | ||
@@ -140,0 +149,0 @@ }); |
{ | ||
"name": "ocr-document-classification", | ||
"version": "1.3.0", | ||
"version": "1.3.1", | ||
"description": "Document classification using tesseract.js and string-similarity-js.", | ||
@@ -5,0 +5,0 @@ "main": "./dist/index.cjs", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
37657
379