ocr-document-classification
Advanced tools
Comparing version 1.3.1 to 1.3.2
@@ -8,10 +8,10 @@ // src/functions.ts | ||
pdfjsLib.GlobalWorkerOptions.workerSrc = "https://cdn.jsdelivr.net/npm/pdfjs-dist@4.4.168/build/pdf.worker.min.mjs"; | ||
async function convert(pdfPath) { | ||
async function convert(pdfPath, maxNumPages) { | ||
try { | ||
const loadingTask = pdfjsLib.getDocument(pdfPath); | ||
const doc = await loadingTask.promise; | ||
const numPages = doc.numPages; | ||
const totalPages = Math.min(pdf.numPages, maxNumPages); | ||
let blobArray = []; | ||
console.log(`PDF loaded with ${numPages} pages`); | ||
for (let i = 1; i <= numPages; i++) { | ||
for (let i = 1; i <= totalPages; i++) { | ||
const page = await doc.getPage(i); | ||
@@ -89,3 +89,3 @@ const viewport = page.getViewport({ scale: 1.5 }); | ||
const pdfPath = URL.createObjectURL(file); | ||
BlobArray = await convert(pdfPath); | ||
BlobArray = await convert(pdfPath, maxNumPages); | ||
console.log("Blob to read set by convert function"); | ||
@@ -98,3 +98,3 @@ } else if (file.type.startsWith("image/")) { | ||
let progress = 0; | ||
const totalBlobs = BlobArray.length; | ||
const totalBlobs = Math.min(BlobArray.length, maxNumPages); | ||
let fullText = ""; | ||
@@ -106,5 +106,2 @@ const targetWords = Array.from( | ||
for (let i = 0; i < totalBlobs; i++) { | ||
if (i >= maxNumPages - 1) { | ||
break; | ||
} | ||
const text = await ocrBlob(BlobArray[i]); | ||
@@ -111,0 +108,0 @@ fullText += text; |
{ | ||
"name": "ocr-document-classification", | ||
"version": "1.3.1", | ||
"version": "1.3.2", | ||
"description": "Document classification using tesseract.js and string-similarity-js.", | ||
@@ -5,0 +5,0 @@ "main": "./dist/index.cjs", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
37617
373