ocr-document-classification
Advanced tools
Comparing version 1.2.8 to 1.2.9
@@ -1,49 +0,31 @@ | ||
var __defProp = Object.defineProperty; | ||
var __getOwnPropDesc = Object.getOwnPropertyDescriptor; | ||
var __getOwnPropNames = Object.getOwnPropertyNames; | ||
var __hasOwnProp = Object.prototype.hasOwnProperty; | ||
var __esm = (fn, res) => function __init() { | ||
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res; | ||
}; | ||
var __export = (target, all) => { | ||
for (var name in all) | ||
__defProp(target, name, { get: all[name], enumerable: true }); | ||
}; | ||
var __copyProps = (to, from, except, desc) => { | ||
if (from && typeof from === "object" || typeof from === "function") { | ||
for (let key of __getOwnPropNames(from)) | ||
if (!__hasOwnProp.call(to, key) && key !== except) | ||
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); | ||
} | ||
return to; | ||
}; | ||
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); | ||
// src/functions.ts | ||
import { stringSimilarity } from "string-similarity-js"; | ||
import * as Tesseract from "tesseract.js"; | ||
// src/convert.mjs | ||
var convert_exports = {}; | ||
__export(convert_exports, { | ||
convert: () => convert | ||
}); | ||
// src/convert.js | ||
import * as pdfjsLib from "pdfjs-dist"; | ||
pdfjsLib.GlobalWorkerOptions.workerSrc = "//mozilla.github.io/pdf.js/build/pdf.worker.js"; | ||
async function convert(pdfPath) { | ||
pdfjsLib.GlobalWorkerOptions.workerSrc = "//mozilla.github.io/pdf.js/build/pdf.worker.js"; | ||
const doc = await pdfjsLib.getDocument(pdfPath); | ||
const page = await doc.getPage(1); | ||
const viewport = page.getViewport({ scale: 1.5 }); | ||
const canvas = document.createElement("canvas"); | ||
const context = canvas.getContext("2d"); | ||
canvas.height = viewport.height; | ||
canvas.width = viewport.width; | ||
await page.render({ canvasContext: context, viewport }).promise; | ||
const buffer = canvas.toDataURL("image/png"); | ||
return buffer; | ||
try { | ||
const doc = await pdfjsLib.getDocument(pdfPath).promise; | ||
const page = await doc.getPage(1); | ||
const viewport = page.getViewport({ scale: 1.5 }); | ||
const canvas = document.createElement("canvas"); | ||
const context = canvas.getContext("2d"); | ||
canvas.height = viewport.height; | ||
canvas.width = viewport.width; | ||
const renderContext = { | ||
canvasContext: context, | ||
viewport | ||
}; | ||
await page.render(renderContext).promise; | ||
const buffer = canvas.toDataURL("image/png"); | ||
return buffer; | ||
} catch (error) { | ||
console.error("Error processing PDF:", error); | ||
throw error; | ||
} | ||
} | ||
var init_convert = __esm({ | ||
"src/convert.mjs"() { | ||
"use strict"; | ||
} | ||
}); | ||
// src/functions.ts | ||
import { stringSimilarity } from "string-similarity-js"; | ||
import * as Tesseract from "tesseract.js"; | ||
function findTargetWords(documentText, targetWords, threshold = 0.75) { | ||
@@ -85,6 +67,5 @@ const foundTargetWords = []; | ||
}; | ||
const { convert: convert2 } = (init_convert(), __toCommonJS(convert_exports)); | ||
if (file.type === "application/pdf") { | ||
const pdfPath = URL.createObjectURL(file); | ||
file = await convert2(pdfPath); | ||
file = await convert(pdfPath); | ||
} | ||
@@ -91,0 +72,0 @@ let progress = 0; |
{ | ||
"name": "ocr-document-classification", | ||
"version": "1.2.8", | ||
"version": "1.2.9", | ||
"description": "Document classification using tesseract.js and string-similarity-js.", | ||
@@ -29,2 +29,3 @@ "main": "./dist/index.cjs", | ||
"dependencies": { | ||
"pdfjs-dist": "^4.4.168", | ||
"string-similarity-js": "^2.1.4", | ||
@@ -31,0 +32,0 @@ "tesseract.js": "^5.1.0", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
30237
4
295
+ Addedpdfjs-dist@^4.4.168
+ Addedbase64-js@1.5.1(transitive)
+ Addedbl@4.1.0(transitive)
+ Addedbuffer@5.7.1(transitive)
+ Addedcanvas@3.0.0-rc2(transitive)
+ Addedchownr@1.1.4(transitive)
+ Addeddecompress-response@4.2.16.0.0(transitive)
+ Addeddeep-extend@0.6.0(transitive)
+ Addeddetect-libc@2.0.3(transitive)
+ Addedend-of-stream@1.4.4(transitive)
+ Addedexpand-template@2.0.3(transitive)
+ Addedfs-constants@1.0.0(transitive)
+ Addedgithub-from-package@0.0.0(transitive)
+ Addedieee754@1.2.1(transitive)
+ Addedinherits@2.0.4(transitive)
+ Addedini@1.3.8(transitive)
+ Addedmimic-response@2.1.03.1.0(transitive)
+ Addedminimist@1.2.8(transitive)
+ Addedmkdirp-classic@0.5.3(transitive)
+ Addednapi-build-utils@1.0.2(transitive)
+ Addednode-abi@3.71.0(transitive)
+ Addednode-addon-api@7.1.1(transitive)
+ Addedonce@1.4.0(transitive)
+ Addedpath2d@0.2.2(transitive)
+ Addedpdfjs-dist@4.8.69(transitive)
+ Addedprebuild-install@7.1.2(transitive)
+ Addedpump@3.0.2(transitive)
+ Addedrc@1.2.8(transitive)
+ Addedreadable-stream@3.6.2(transitive)
+ Addedsafe-buffer@5.2.1(transitive)
+ Addedsemver@7.6.3(transitive)
+ Addedsimple-concat@1.0.1(transitive)
+ Addedsimple-get@3.1.14.0.1(transitive)
+ Addedstring_decoder@1.3.0(transitive)
+ Addedstrip-json-comments@2.0.1(transitive)
+ Addedtar-fs@2.1.1(transitive)
+ Addedtar-stream@2.2.0(transitive)
+ Addedtunnel-agent@0.6.0(transitive)
+ Addedutil-deprecate@1.0.2(transitive)
+ Addedwrappy@1.0.2(transitive)