ocr-document-classification
Advanced tools
Comparing version 1.3.4 to 1.3.5
@@ -5,3 +5,8 @@ type documentDictionary = { | ||
declare function classifyDocument(file: File, onProgress?: (progress: number) => void, customDocumentDictionary?: documentDictionary, maxNumPages?: number): Promise<{ | ||
type ClassifyDocumentOptions = { | ||
onProgress?: (progress: number) => void; | ||
customDocumentDictionary?: documentDictionary; | ||
maxNumPages?: number; | ||
}; | ||
declare function classifyDocument(file: File, options?: ClassifyDocumentOptions): Promise<{ | ||
classification: string; | ||
@@ -8,0 +13,0 @@ text: string; |
@@ -84,3 +84,9 @@ // src/functions.ts | ||
}; | ||
async function classifyDocument(file, onProgress, customDocumentDictionary, maxNumPages = Infinity) { | ||
async function classifyDocument(file, options = {}) { | ||
const { | ||
onProgress, | ||
customDocumentDictionary, | ||
maxNumPages = Infinity | ||
// Default value if not provided | ||
} = options; | ||
const documentDictionary = { | ||
@@ -87,0 +93,0 @@ ...defaultDocumentDictionary, |
{ | ||
"name": "ocr-document-classification", | ||
"version": "1.3.4", | ||
"version": "1.3.5", | ||
"description": "Document classification using tesseract.js and string-similarity-js.", | ||
@@ -5,0 +5,0 @@ "main": "./dist/index.cjs", |
@@ -30,5 +30,7 @@ # OCR Document Classification | ||
- `file`: The image file (File object) of the document to be classified. | ||
- `onProgress` (optional): A callback function to receive progress updates. It accepts a number between 0 and 1. | ||
- `customDocumentDictionary` (optional): An object containing custom document types and their associated target words. | ||
- file: The image file (File object) of the document to be classified. | ||
- options (optional): An object containing the following optional properties: | ||
- onProgress: A callback function to receive progress updates. It accepts a number between 0 and 100. | ||
- customDocumentDictionary: An object containing custom document types and their associated target words. | ||
- maxNumPages: A number specifying the maximum number of pages to process. Defaults to Infinity. | ||
@@ -43,3 +45,3 @@ #### Returns | ||
### Classes | ||
There exists a couple of default classes that can be useful to classify the most common documents. As you can see there exists multiple arrays for each key. This means that every word of **only ONE** of the arrays needs to be found in the document after OCR. You can also add your own class my creating a customDocumentDictionary | ||
There exists a couple of default classes that can be useful to classify the most common documents. As you can see there exists multiple arrays for each key. This means that every word of **only ONE** of the arrays needs to be found in the document after OCR. You can also add your own class my creating a customDocumentDictionary. | ||
```javascript | ||
@@ -68,57 +70,67 @@ const defaultDocumentDictionary: documentDictionary = { | ||
```jsx | ||
import React, { useState, useEffect } from "react"; | ||
import { classifyDocument } from "ocr-document-classification"; | ||
function UploadClassification() { | ||
const [documentFile, setDocumentFile] = useState<File | null>(null) | ||
const [classification, setClassification] = useState('') | ||
const [outputText, setOutputText] = useState('') | ||
const [progress, setProgress] = useState(0) | ||
const [documentFile, setDocumentFile] = useState<File | null>(null); | ||
const [classification, setClassification] = useState(""); | ||
const [outputText, setOutputText] = useState(""); | ||
const [progress, setProgress] = useState(0); | ||
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => { | ||
const file = event.target.files && event.target.files[0] | ||
setDocumentFile(file) | ||
} | ||
const file = event.target.files && event.target.files[0]; | ||
setDocumentFile(file); | ||
}; | ||
const customDocumentDictionary = { | ||
Jobbsøknad: [['søknad', 'stilling', 'ledig']], | ||
} | ||
Jobbsøknad: [["søknad", "stilling", "ledig"]], | ||
}; | ||
useEffect(() => { | ||
console.log('Progress: ', progress) | ||
}, [progress]) | ||
useEffect(() => { | ||
console.log("Progress: ", progress); | ||
}, [progress]); | ||
useEffect(() => { | ||
if (documentFile) { | ||
classifyDocument(documentFile, setProgress, customDocumentDictionary) | ||
classifyDocument(documentFile, { | ||
onProgress: setProgress, | ||
customDocumentDictionary: customDocumentDictionary, | ||
}) | ||
.then(({ classification, text }) => { | ||
setClassification(classification) | ||
setOutputText(text) | ||
setClassification(classification); | ||
setOutputText(text); | ||
}) | ||
.catch((err) => { | ||
console.error(err) | ||
setOutputText('Error during OCR processing') | ||
}) | ||
console.error(err); | ||
setOutputText("Error during OCR processing"); | ||
}); | ||
} | ||
resetOCR() | ||
}, [documentFile]) | ||
resetOCR(); | ||
}, [documentFile]); | ||
function resetOCR() { | ||
setClassification('') | ||
setOutputText('') | ||
setImageSrc('') | ||
setProgress(0) | ||
setClassification(""); | ||
setOutputText(""); | ||
setProgress(0); | ||
} | ||
return ( | ||
return ( | ||
<> | ||
<input | ||
<input | ||
accept="image/jpeg, image/png" | ||
type="file" | ||
onChange={handleFileChange} | ||
/> | ||
<div> | ||
/> | ||
<div> | ||
<h3>Resultat av OCR</h3> | ||
<p>{classification ? outputText : 'Laster inn ...'}</p> | ||
<p>{classification ? outputText : "Laster inn ..."}</p> | ||
<h1>{classification}</h1> | ||
</div> | ||
)} | ||
</div> | ||
</> | ||
); | ||
} | ||
export default UploadClassification; | ||
``` | ||
@@ -132,2 +144,3 @@ | ||
- `tesseract.js`: For performing OCR on the document image. | ||
- `pdfjs-dist`: For handling PDFs | ||
@@ -134,0 +147,0 @@ ## LICENSE |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
39428
398
146