ocr-document-classification
Advanced tools
Comparing version 1.1.2 to 1.1.3
@@ -110,12 +110,18 @@ "use strict"; | ||
if (file.type === "application/pdf") { | ||
console.log("Processing PDF file..."); | ||
text = await extractTextFromPDF(fileContent); | ||
console.log("Text extracted from PDF:", text); | ||
if (!text.trim()) { | ||
console.log("Extracted text is empty, using OCR..."); | ||
const blob = new Blob([fileContent], { type: "application/pdf" }); | ||
const imageDataUrl = URL.createObjectURL(blob); | ||
text = await ocrImage(imageDataUrl, onProgress); | ||
console.log("Text extracted using OCR:", text); | ||
} | ||
progress = 0.9; | ||
} else { | ||
console.log("Processing image file..."); | ||
const imageDataUrl = URL.createObjectURL(file); | ||
text = await ocrImage(imageDataUrl, onProgress); | ||
console.log("Text extracted from image using OCR:", text); | ||
progress = 0.9; | ||
@@ -136,5 +142,10 @@ } | ||
} catch (err) { | ||
console.error("Error processing document:", err); | ||
reject(err); | ||
} | ||
}; | ||
reader.onerror = (err) => { | ||
console.error("FileReader error:", err); | ||
reject(err); | ||
}; | ||
if (file.type === "application/pdf") { | ||
@@ -141,0 +152,0 @@ reader.readAsArrayBuffer(file); |
{ | ||
"name": "ocr-document-classification", | ||
"version": "1.1.2", | ||
"version": "1.1.3", | ||
"description": "Document classification using tesseract.js and string-similarity-js.", | ||
@@ -5,0 +5,0 @@ "main": "./dist/index.js", |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
16143
311