pdf-parser-client-side
Advanced tools
Comparing version 1.0.1 to 1.0.2
22
index.js
@@ -14,3 +14,3 @@ /* --------------------------------------------- | ||
module.exports = extractTextFromPDF = async (file) => { | ||
module.exports = extractTextFromPDF = async (file, variant) => { | ||
try { | ||
@@ -35,2 +35,22 @@ // Create a blob URL for the PDF file | ||
if (extractedText.length > 0) { | ||
// variant check | ||
if (variant === "clean") { | ||
extractedText = extractedText.replaceAll( | ||
/[^\x00-\x7F]+\ *(?:[^\x00-\x7F]| )*/g, | ||
" " | ||
); | ||
} | ||
if (variant === "alphanumeric") { | ||
extractedText = extractedText.replaceAll(/[^a-zA-Z0-9]+/g, " "); | ||
} | ||
if (variant === "alphanumericwithspace") { | ||
extractedText = extractedText.replaceAll(/[^a-zA-Z0-9 ]+/g, " "); | ||
} | ||
if (variant === "alphanumericwithspaceandpunctuation") { | ||
extractedText = extractedText.replaceAll(/[^a-zA-Z0-9 .,!?]+/g, " "); | ||
} | ||
if (variant === "alphanumericwithspaceandpunctuationandnewline") { | ||
extractedText = extractedText.replaceAll(/[^a-zA-Z0-9 .,!?]+/g, " "); | ||
} | ||
return extractedText; | ||
@@ -37,0 +57,0 @@ } |
{ | ||
"name": "pdf-parser-client-side", | ||
"version": "1.0.1", | ||
"version": "1.0.2", | ||
"description": "A lightweight easy to use package to parse text from PDF files on client side without any server dependency.", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -12,5 +12,7 @@ <div align="center"> | ||
## PDF Parser Client Side | ||
A lightweight easy to use package to parse text from PDF files on client side without any server dependency. | ||
## How to Install ? | ||
Use npm or yarn to install this npm package | ||
@@ -21,3 +23,5 @@ | ||
``` | ||
or | ||
```js | ||
@@ -61,3 +65,46 @@ yarn add pdf-parser-client-side | ||
} | ||
``` | ||
#### `variant` Parameter | ||
The `variant` parameter is used to specify the type of text extraction and replacement to be performed on the `extractedText`. Depending on the value of the `variant` parameter, different types of characters will be removed or retained. | ||
| `variant` Value | Description | Regular Expression | Retained Characters | | ||
| ----------------------------------------------- | -------------------------------------------------------------------------------------- | ---------------------------------- | -------------------------- | --------------------- | | ||
| `clean` | Removes all non-ASCII characters and any spaces that follow them. | `/[^\x00-\x7F]+\ \*(?:[^\x00-\x7F] | )\*/g` | ASCII characters only | | ||
| `alphanumeric` | Retains only alphanumeric characters (letters and numbers). | `/[^a-zA-Z0-9]+/g` | A-Z, a-z, 0-9 | | ||
| `alphanumericwithspace` | Retains alphanumeric characters and spaces. | `/[^a-zA-Z0-9 ]+/g` | A-Z, a-z, 0-9, space | | ||
| `alphanumericwithspaceandpunctuation` | Retains alphanumeric characters, spaces, and basic punctuation marks (.,!?,). | `/[^a-zA-Z0-9 .,!?]+/g` | A-Z, a-z, 0-9, space, .,!? | | ||
| `alphanumericwithspaceandpunctuationandnewline` | Retains alphanumeric characters, spaces, basic punctuation marks (.,!?), and newlines. | `/[^a-zA-Z0-9 .,!?]+/g` | A-Z, a-z, 0-9, space, .,!? | | ||
#### Example Usage | ||
```javascript | ||
import React from "react"; | ||
import extractTextFromPDF from "pdf-parser-client-side"; | ||
let extractedText = "Example text with special characters: !@#$%^&*()_+"; | ||
export default function Test() { | ||
return ( | ||
<div> | ||
<input | ||
type="file" | ||
name="" | ||
id="file-selector" | ||
accept=".pdf" | ||
onChange={(e) => { | ||
// Selecting the first file | ||
const file = e.target.files[0]; | ||
// If file exists then we will call our function | ||
if (file) { | ||
extractTextFromPDF(file, "clean").then((data) => { | ||
console.log(data); | ||
}); | ||
} | ||
}} | ||
/> | ||
</div> | ||
); | ||
} | ||
``` | ||
@@ -64,0 +111,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
14701
78
118