Extract data from a pdf with pure javascript.
Inspered by https://www.npmjs.com/package/pdf-parse, which is currently unmaintained.
Install
npm install pdfdataextract
Docs
Full documentation is available at the wiki
Usage
import { PdfData, VerbosityLevel } from 'pdfdataextract';
import { readFileSync } from 'fs';
const file_data = readFileSync('some_pdf_file.pdf');
PdfData.extract(file_data, {
password: '123456',
pages: 1,
sort: true,
verbosity: VerbosityLevel.ERRORS,
get: {
pages: true,
text: true,
fingerprint: true,
outline: true,
metadata: true,
info: true,
permissions: true,
},
}).then((data) => {
data.pages;
data.text;
data.fingerprint;
data.outline;
data.info;
data.metadata;
data.permissions;
});
import { PdfDataExtractor, VerbosityLevel } from 'pdfdataextract';
import { readFileSync } from 'fs';
const file_data = readFileSync('some_pdf_file.pdf');
PdfDataExtractor.get(file_data, {
password: '123456',
verbosity: VerbosityLevel.ERRORS,
}).then((extractor) => {
extractor.pages;
extractor.fingerprint;
extractor.getText(1, true).then((text) => {
});
extractor.getText([2]).then((text) => {
});
extractor.getOutline().then((outline) => {
});
extractor.getMetadata().then((metadata) => {
});
extractor.getPermissions().then((permissions) => {
});
extractor.close();
});
Test
npm test
Maybe TODOs
License
MIT licensed