ocr-document-classification - npm Package Compare versions

Comparing version 1.0.9 to 1.0.10

dist/index.js

		@@ -45,3 +45,6 @@ "use strict";
		targetWords.forEach((targetWord) => {
		const similarity = (0, import_string_similarity_js.stringSimilarity)(docWord.toLowerCase(), targetWord.toLowerCase());
		const similarity = (0, import_string_similarity_js.stringSimilarity)(
		docWord.toLowerCase(),
		targetWord.toLowerCase()
		);
		if (similarity >= threshold) {
		@@ -55,3 +58,3 @@ foundTargetWords.push(targetWord);
		var defaultDocumentDictionary = {
		BEVISPAFORSTEGANGSTJENESTE: [
		"BEVIS P\xC5 F\xD8RSTEGANGSTJENESTE": [
		["f\xF8rstegangstjeneste", "bevis", "avtjent"],
		@@ -63,6 +66,9 @@ ["attest", "f\xF8rstegangstjeneste"],
		KOMPETANSEBEVIS: [["omfatter", "oppl\xE6ring", "utdanningsprogram"]],
		LEGEERKLERING: [["legeerkl\xE6ring", "f\xF8dselsnummer"]]
		LEGEERKL\u00C6RING: [["legeerkl\xE6ring", "f\xF8dselsnummer"]]
		};
		async function classifyDocument(file, onProgress, customDocumentDictionary) {
		const documentDictionary = { ...defaultDocumentDictionary, ...customDocumentDictionary };
		const documentDictionary = {
		...defaultDocumentDictionary,
		...customDocumentDictionary
		};
		let progress = 0;
		@@ -86,10 +92,17 @@ const worker = await Tesseract.createWorker(["noreng"], 1, {
		if (onProgress) onProgress(progress);
		const { data: { text } } = await worker.recognize(imageDataUrl, {
		const {
		data: { text }
		} = await worker.recognize(imageDataUrl, {
		rotateAuto: true
		});
		const targetWords = Array.from(new Set(Object.values(documentDictionary).flat(2)));
		const targetWords = Array.from(
		new Set(Object.values(documentDictionary).flat(2))
		);
		const targetWordsFound = findTargetWords(text, targetWords);
		progress = 0.9;
		if (onProgress) onProgress(progress);
		const classification = determineClassification(targetWordsFound, documentDictionary);
		const classification = determineClassification(
		targetWordsFound,
		documentDictionary
		);
		progress = 1;
		@@ -106,3 +119,5 @@ if (onProgress) onProgress(progress);
		function determineClassification(targetWordsFound, documentDictionary) {
		for (const [classification, targetWordSets] of Object.entries(documentDictionary)) {
		for (const [classification, targetWordSets] of Object.entries(
		documentDictionary
		)) {
		for (const targetWords of targetWordSets) {
		@@ -114,3 +129,3 @@ if (targetWords.every((word) => targetWordsFound.includes(word))) {
		}
		return "UNKNOWN";
		return "UKJENT";
		}
		@@ -117,0 +132,0 @@ // Annotate the CommonJS export names for ESM import in node:

package.json

		{
		"name": "ocr-document-classification",
		"version": "1.0.9",
		"version": "1.0.10",
		"description": "Document classification using tesseract.js and string-similarity-js.",
		@@ -5,0 +5,0 @@ "main": "./dist/index.js",

dist/index.mjs

Sorry, the diff of this file is not supported yet

ocr-document-classification - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics