Comparing version 2.0.1 to 2.1.0
@@ -203,3 +203,3 @@ /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ | ||
// Only the last byte has to be incremented. | ||
dstLow = dstLow.substr(0, lastByte) + | ||
dstLow = dstLow.substring(0, lastByte) + | ||
String.fromCharCode(dstLow.charCodeAt(lastByte) + 1); | ||
@@ -206,0 +206,0 @@ ++low; |
@@ -190,3 +190,7 @@ /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ | ||
var xResources = xObject.dict.get('Resources'); | ||
if (isDict(xResources)) { | ||
if ( | ||
isDict(xResources) && | ||
xResources !== node && | ||
nodes.indexOf(xResources) < 0 | ||
) { | ||
nodes.push(xResources); | ||
@@ -1027,3 +1031,3 @@ } | ||
if (isName(cmapObj)) { | ||
var isIdentityMap = cmapObj.name.substr(0, 9) == 'Identity-'; | ||
var isIdentityMap = cmapObj.name.substring(0, 9) == 'Identity-'; | ||
if (!isIdentityMap) | ||
@@ -1030,0 +1034,0 @@ error('ToUnicode file cmap translation not implemented'); |
@@ -171,4 +171,4 @@ /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ | ||
function spliceString(s, offset, remove, insert) { | ||
var chunk1 = data.substr(0, offset); | ||
var chunk2 = data.substr(offset + remove); | ||
var chunk1 = data.substring(0, offset); | ||
var chunk2 = data.substring(offset + remove); | ||
return chunk1 + insert + chunk2; | ||
@@ -175,0 +175,0 @@ } |
@@ -873,2 +873,3 @@ /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ | ||
}, | ||
createRgbBuffer: ColorSpace.prototype.createRgbBuffer, | ||
isPassthrough: ColorSpace.prototype.isPassthrough, | ||
@@ -875,0 +876,0 @@ isDefaultDecode: function LabCS_isDefaultDecode(decodeMap) { |
@@ -249,3 +249,3 @@ /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ | ||
} | ||
var protocol = url.substr(0, colon); | ||
var protocol = url.substring(0, colon); | ||
switch (protocol) { | ||
@@ -1163,32 +1163,34 @@ case 'http': | ||
comObj.onmessage = function messageHandlerComObjOnMessage(event) { | ||
var data = event.data; | ||
if (data.isReply) { | ||
var callbackId = data.callbackId; | ||
if (data.callbackId in callbacks) { | ||
var callback = callbacks[callbackId]; | ||
delete callbacks[callbackId]; | ||
callback(data.data); | ||
} else { | ||
error('Cannot resolve callback ' + callbackId); | ||
} | ||
} else if (data.action in ah) { | ||
var action = ah[data.action]; | ||
if (data.callbackId) { | ||
var promise = new Promise(); | ||
promise.then(function(resolvedData) { | ||
comObj.postMessage({ | ||
isReply: true, | ||
callbackId: data.callbackId, | ||
data: resolvedData | ||
}); | ||
}); | ||
action[0].call(action[1], data.data, promise); | ||
} else { | ||
action[0].call(action[1], data.data); | ||
} | ||
} else { | ||
error('Unkown action from worker: ' + data.action); | ||
} | ||
}; | ||
if (typeof comObj === 'object') { | ||
comObj.onmessage = function messageHandlerComObjOnMessage(event) { | ||
var data = event.data; | ||
if (data.isReply) { | ||
var callbackId = data.callbackId; | ||
if (data.callbackId in callbacks) { | ||
var callback = callbacks[callbackId]; | ||
delete callbacks[callbackId]; | ||
callback(data.data); | ||
} else { | ||
error('Cannot resolve callback ' + callbackId); | ||
} | ||
} else if (data.action in ah) { | ||
var action = ah[data.action]; | ||
if (data.callbackId) { | ||
var promise = new Promise(); | ||
promise.then(function(resolvedData) { | ||
comObj.postMessage({ | ||
isReply: true, | ||
callbackId: data.callbackId, | ||
data: resolvedData | ||
}); | ||
}); | ||
action[0].call(action[1], data.data, promise); | ||
} else { | ||
action[0].call(action[1], data.data); | ||
} | ||
} else { | ||
error('Unkown action from worker: ' + data.action); | ||
} | ||
}; | ||
} | ||
} | ||
@@ -1240,3 +1242,2 @@ | ||
//MQZ Oct.18.2013 expose util methods | ||
var nodeUtil = require("util"); | ||
nodeUtil.p2jlog = log; | ||
@@ -1243,0 +1244,0 @@ nodeUtil.p2jinfo = info; |
@@ -1,22 +0,12 @@ | ||
const nodeUtil = require("util"), | ||
fs = require("fs"), | ||
path = require("path"), | ||
{ParserStream, StringifyStream} = require("./parserstream"), | ||
pkInfo = require("../package.json"), | ||
PDFParser = require("../pdfparser"); | ||
import nodeUtil from "util"; | ||
import fs from "fs"; | ||
import path from "path"; | ||
const _PRO_TIMER = `${pkInfo.name}@${pkInfo.version} [${pkInfo.homepage}]`; | ||
import {ParserStream, StringifyStream} from "./parserstream.js"; | ||
import PDFParser from "../pdfparser.js"; | ||
const yargs = require('./p2jcmdarg') | ||
.usage(`\n${_PRO_TIMER}\n\nUsage: ${pkInfo.name} -f|--file [-o|output_dir]`) | ||
.alias('v', 'version', 'Display version.') | ||
.alias('h', 'help', 'Display brief help information.') | ||
.alias('f', 'file', '(required) Full path of input PDF file or a directory to scan for all PDF files.\n\t\t When specifying a PDF file name, it must end with .PDF, otherwise it would be treated as a input directory.') | ||
.alias('o', 'output', '(optional) Full path of output directory, must already exist.\n\t\t Current JSON file in the output folder will be replaced when file name is same.') | ||
.alias('s', 'silent', '(optional) when specified, will only log errors, otherwise verbose.') | ||
.alias('t', 'fieldTypes', '(optional) when specified, will generate .fields.json that includes fields ids and types.') | ||
.alias('c', 'content', '(optional) when specified, will generate .content.txt that includes text content from PDF.') | ||
.alias('m', 'merge', '(optional) when specified, will generate .merged.json that includes auto-merged broken text blocks from PDF.') | ||
.alias('r', 'stream', '(optional) when specified, will process and parse with buffer/object transform stream rather than file system.'); | ||
import { pkInfo, _PARSER_SIG as _PRO_TIMER } from "../pkinfo.js"; | ||
import { yargs } from "./p2jcmdarg.js"; | ||
const argv = yargs.argv; | ||
@@ -62,94 +52,83 @@ const ONLY_SHOW_VERSION = ('v' in argv); | ||
//private methods | ||
#continue(callback, err) { | ||
if (typeof callback === "function") | ||
callback(err); | ||
//private methods | ||
#generateMergedTextBlocksStream() { | ||
return new Promise( (resolve, reject) => { | ||
const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".merged.json"), resolve, reject); | ||
this.pdfParser.getMergedTextBlocksStream().pipe(new StringifyStream()).pipe(outputStream); | ||
}); | ||
} | ||
#onPdfParserError(evtData, callback) { | ||
this.curCLI.addResultCount(evtData.parserError); | ||
this.#continue(callback, evtData.parserError); | ||
} | ||
#generateMergedTextBlocksStream(callback) { | ||
const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".merged.json"), callback); | ||
this.pdfParser.getMergedTextBlocksStream().pipe(new StringifyStream()).pipe(outputStream); | ||
} | ||
#generateRawTextContentStream(callback) { | ||
const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".content.txt"), callback); | ||
this.pdfParser.getRawTextContentStream().pipe(outputStream); | ||
#generateRawTextContentStream() { | ||
return new Promise( (resolve, reject) => { | ||
const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".content.txt"), resolve, reject); | ||
this.pdfParser.getRawTextContentStream().pipe(outputStream); | ||
}); | ||
} | ||
#generateFieldsTypesStream(callback) { | ||
const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".fields.json"), callback); | ||
this.pdfParser.getAllFieldsTypesStream().pipe(new StringifyStream()).pipe(outputStream); | ||
#generateFieldsTypesStream() { | ||
return new Promise( (resolve, reject) => { | ||
const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".fields.json"), resolve, reject); | ||
this.pdfParser.getAllFieldsTypesStream().pipe(new StringifyStream()).pipe(outputStream); | ||
}); | ||
} | ||
#processAdditionalStreams(callback) { | ||
#processAdditionalStreams() { | ||
const outputTasks = []; | ||
if (PROCESS_FIELDS_CONTENT) {//needs to generate fields.json file | ||
outputTasks.push(cbFunc => this.#generateFieldsTypesStream(cbFunc)); | ||
outputTasks.push(this.#generateFieldsTypesStream()); | ||
} | ||
if (PROCESS_RAW_TEXT_CONTENT) {//needs to generate content.txt file | ||
outputTasks.push(cbFunc => this.#generateRawTextContentStream(cbFunc)); | ||
outputTasks.push(this.#generateRawTextContentStream()); | ||
} | ||
if (PROCESS_MERGE_BROKEN_TEXT_BLOCKS) {//needs to generate json file with merged broken text blocks | ||
outputTasks.push(cbFunc => this.#generateMergedTextBlocksStream(cbFunc)); | ||
outputTasks.push(this.#generateMergedTextBlocksStream()); | ||
} | ||
let taskId = 0; | ||
function sequenceTask() { | ||
if (taskId < outputTasks.length) { | ||
outputTasks[taskId]((err, ret) => { | ||
this.curCLI.addStatusMsg(err, `[+]=> ${ret}`); | ||
taskId++; | ||
sequenceTask.call(this); | ||
}); | ||
} | ||
else | ||
this.#continue(callback); | ||
} | ||
sequenceTask.call(this); | ||
return Promise.allSettled(outputTasks); | ||
} | ||
#onPrimarySuccess(callback) { | ||
#onPrimarySuccess(resolve, reject) { | ||
this.curCLI.addResultCount(); | ||
this.#processAdditionalStreams(callback); | ||
this.#processAdditionalStreams() | ||
.then( retVal => resolve(retVal)) | ||
.catch( err => reject(err) ); | ||
} | ||
#onPrimaryError(err, callback) { | ||
#onPrimaryError(err, reject) { | ||
this.curCLI.addResultCount(err); | ||
callback(err); | ||
reject(err); | ||
} | ||
#parseOnePDFStream(callback) { | ||
this.pdfParser = new PDFParser(null, PROCESS_RAW_TEXT_CONTENT); | ||
this.pdfParser.on("pdfParser_dataError", evtData => this.#onPdfParserError(evtData, callback)); | ||
const outputStream = fs.createWriteStream(this.outputPath); | ||
outputStream.on('finish', () => this.#onPrimarySuccess(callback)); | ||
outputStream.on('error', err => this.#onPrimaryError(err, callback)); | ||
nodeUtil.p2jinfo("Transcoding Stream " + this.inputFile + " to - " + this.outputPath); | ||
let inputStream = fs.createReadStream(this.inputPath, {bufferSize: 64 * 1024}); | ||
inputStream.pipe(this.pdfParser.createParserStream()).pipe(new StringifyStream()).pipe(outputStream); | ||
#parseOnePDFStream() { | ||
return new Promise( (resolve, reject) => { | ||
this.pdfParser = new PDFParser(null, PROCESS_RAW_TEXT_CONTENT); | ||
this.pdfParser.on("pdfParser_dataError", evtData => this.#onPrimaryError(evtData.parserError, reject)); | ||
const outputStream = fs.createWriteStream(this.outputPath); | ||
outputStream.on('finish', () => this.#onPrimarySuccess(resolve, reject)); | ||
outputStream.on('error', err => this.#onPrimaryError(err, reject)); | ||
nodeUtil.p2jinfo("Transcoding Stream " + this.inputFile + " to - " + this.outputPath); | ||
const inputStream = fs.createReadStream(this.inputPath, {bufferSize: 64 * 1024}); | ||
inputStream.pipe(this.pdfParser.createParserStream()).pipe(new StringifyStream()).pipe(outputStream); | ||
}); | ||
}; | ||
#parseOnePDF(callback) { | ||
this.pdfParser = new PDFParser(null, PROCESS_RAW_TEXT_CONTENT); | ||
this.pdfParser.on("pdfParser_dataError", evtData => this.#onPdfParserError(evtData, callback)); | ||
#parseOnePDF() { | ||
return new Promise( (resolve, reject) => { | ||
this.pdfParser = new PDFParser(null, PROCESS_RAW_TEXT_CONTENT); | ||
this.pdfParser.on("pdfParser_dataError", evtData => this.#onPrimaryError(evtData.parserError, reject)); | ||
this.pdfParser.on("pdfParser_dataReady", evtData => { | ||
fs.writeFile(this.outputPath, JSON.stringify(evtData), err => { | ||
if(err) { | ||
this.#onPrimaryError(err, callback); | ||
} else { | ||
this.#onPrimarySuccess(callback); | ||
} | ||
this.pdfParser.on("pdfParser_dataReady", evtData => { | ||
fs.writeFile(this.outputPath, JSON.stringify(evtData), err => { | ||
if(err) { | ||
this.#onPrimaryError(err, reject); | ||
} else { | ||
this.#onPrimarySuccess(resolve, reject); | ||
} | ||
}); | ||
}); | ||
nodeUtil.p2jinfo("Transcoding File " + this.inputFile + " to - " + this.outputPath); | ||
this.pdfParser.loadPDF(this.inputPath, VERBOSITY_LEVEL); | ||
}); | ||
nodeUtil.p2jinfo("Transcoding File " + this.inputFile + " to - " + this.outputPath); | ||
this.pdfParser.loadPDF(this.inputPath, VERBOSITY_LEVEL); | ||
} | ||
@@ -209,13 +188,15 @@ | ||
processFile(callback) { | ||
let validateMsg = this.validateParams(); | ||
if (!!validateMsg) { | ||
this.#continue(callback, validateMsg); | ||
} | ||
else if (PROCESS_WITH_STREAM) { | ||
this.#parseOnePDFStream(callback); | ||
} | ||
else { | ||
this.#parseOnePDF(callback); | ||
} | ||
processFile() { | ||
return new Promise((resolve, reject) => { | ||
const validateMsg = this.validateParams(); | ||
if (!!validateMsg) { | ||
reject(validateMsg); | ||
} | ||
else { | ||
const parserFunc = PROCESS_WITH_STREAM ? this.#parseOnePDFStream : this.#parseOnePDF; | ||
parserFunc.call(this) | ||
.then( value => resolve(value) ) | ||
.catch( err => reject(err) ); | ||
} | ||
}); | ||
} | ||
@@ -228,3 +209,3 @@ | ||
class PDFCLI { | ||
export default class PDFCLI { | ||
inputCount = 0; | ||
@@ -243,8 +224,5 @@ successCount = 0; | ||
this.statusMsgs = []; | ||
this.p2j = null; | ||
} | ||
initialize() { | ||
console.time(_PRO_TIMER); | ||
initialize() { | ||
nodeUtil.verbosity(VERBOSITY_LEVEL); | ||
@@ -274,23 +252,25 @@ let retVal = true; | ||
start() { | ||
if (!this.initialize()) { | ||
console.timeEnd(_PRO_TIMER); | ||
async start() { | ||
if (!this.initialize()) | ||
return; | ||
} | ||
try { | ||
console.log("\n" + _PRO_TIMER); | ||
const inputStatus = fs.statSync(INPUT_DIR_OR_FILE); | ||
console.log(_PRO_TIMER); | ||
console.time(_PRO_TIMER); | ||
try { | ||
const inputStatus = fs.statSync(INPUT_DIR_OR_FILE); | ||
if (inputStatus.isFile()) { | ||
this.processOneFile(); | ||
this.inputCount = 1; | ||
await this.processOneFile(path.dirname(INPUT_DIR_OR_FILE), path.basename(INPUT_DIR_OR_FILE)); | ||
} | ||
else if (inputStatus.isDirectory()) { | ||
this.processOneDirectory(); | ||
else if (inputStatus.isDirectory()) { | ||
await this.processOneDirectory(path.normalize(INPUT_DIR_OR_FILE)); | ||
} | ||
} | ||
catch(e) { | ||
console.error("Exception: " + e.message); | ||
console.timeEnd(_PRO_TIMER); | ||
console.error("Exception: ", e); | ||
} | ||
finally { | ||
this.complete(); | ||
} | ||
} | ||
@@ -308,62 +288,53 @@ | ||
processOneFile() { | ||
const inputDir = path.dirname(INPUT_DIR_OR_FILE); | ||
const inputFile = path.basename(INPUT_DIR_OR_FILE); | ||
this.inputCount = 1; | ||
this.p2j = new PDFProcessor(inputDir, inputFile, this); | ||
this.p2j.processFile( err => { | ||
this.addStatusMsg(err, `${path.join(inputDir, inputFile)} => ${err ?? this.p2j.getOutputFile()}`); | ||
this.complete(); | ||
}); | ||
processOneFile(inputDir, inputFile) { | ||
return new Promise((resolve, reject) => { | ||
const p2j = new PDFProcessor(inputDir, inputFile, this); | ||
p2j.processFile() | ||
.then( retVal => { | ||
this.addStatusMsg(null, `${path.join(inputDir, inputFile)} => ${p2j.getOutputFile()}`); | ||
retVal.forEach(ret => this.addStatusMsg(null, `+ ${ret.value}`)); | ||
resolve(retVal); | ||
}) | ||
.catch(error => { | ||
this.addStatusMsg(error, `${path.join(inputDir, inputFile)} => ${error}`); | ||
reject(error); | ||
}) | ||
.finally(() => p2j.destroy()); | ||
}); | ||
} | ||
processFiles(inputDir, files) { | ||
let fId = 0; | ||
this.p2j = new PDFProcessor(inputDir, files[fId], this); | ||
this.p2j.processFile( function processPDFFile(err) { | ||
this.addStatusMsg(err, `${path.join(inputDir, files[fId])} => ${err ?? this.p2j.getOutputFile()}`); | ||
fId++; | ||
if (fId >= this.inputCount) { | ||
this.complete(); | ||
} | ||
else { | ||
if (this.p2j) { | ||
this.p2j.destroy(); | ||
this.p2j = null; | ||
} | ||
this.p2j = new PDFProcessor(inputDir, files[fId], this); | ||
this.p2j.processFile(processPDFFile.bind(this)); | ||
} | ||
}.bind(this) ); | ||
const allPromises = []; | ||
files.forEach( (file, idx) => allPromises.push(this.processOneFile(inputDir, file)) ); | ||
return Promise.allSettled(allPromises); | ||
} | ||
processOneDirectory() { | ||
let inputDir = path.normalize(INPUT_DIR_OR_FILE); | ||
processOneDirectory(inputDir) { | ||
return new Promise((resolve, reject) => { | ||
fs.readdir(inputDir, (err, files) => { | ||
if (err) { | ||
this.addStatusMsg(true, `[${inputDir}] - ${err.toString()}`); | ||
reject(err); | ||
} | ||
else { | ||
const _iChars = "!@#$%^&*()+=[]\\\';,/{}|\":<>?~`.-_ "; | ||
const pdfFiles = files.filter( file => file.slice(-4).toLowerCase() === '.pdf' && _iChars.indexOf(file.substring(0,1)) < 0 ); | ||
fs.readdir(inputDir, (err, files) => { | ||
if (err) { | ||
this.addStatusMsg(true, `[${inputDir}] - ${err.toString()}`); | ||
this.complete(); | ||
} | ||
else { | ||
const _iChars = "!@#$%^&*()+=[]\\\';,/{}|\":<>?~`.-_ "; | ||
const pdfFiles = files.filter( file => file.substr(-4).toLowerCase() === '.pdf' && _iChars.indexOf(file.substr(0,1)) < 0 ); | ||
this.inputCount = pdfFiles.length; | ||
if (this.inputCount > 0) { | ||
this.processFiles(inputDir, pdfFiles); | ||
} | ||
else { | ||
this.addStatusMsg(true, `[${inputDir}] - No PDF files found`); | ||
this.complete(); | ||
} | ||
} | ||
}); | ||
this.inputCount = pdfFiles.length; | ||
if (this.inputCount > 0) { | ||
this.processFiles(inputDir, pdfFiles) | ||
.then( value => resolve(value) ) | ||
.catch( err => reject(err) ); | ||
} | ||
else { | ||
this.addStatusMsg(true, `[${inputDir}] - No PDF files found`); | ||
resolve(); | ||
} | ||
} | ||
}); | ||
}); | ||
} | ||
addStatusMsg(error, oneMsg) { | ||
this.statusMsgs.push(error ? `✗ Error - ${oneMsg}` : `✓ Success - ${oneMsg}`); | ||
this.statusMsgs.push(error ? `✗ Error : ${oneMsg}` : `✓ Success : ${oneMsg}`); | ||
} | ||
@@ -375,3 +346,1 @@ | ||
} | ||
module.exports = PDFCLI; |
@@ -0,1 +1,3 @@ | ||
import { pkInfo, _PARSER_SIG as _PRO_TIMER } from "../pkinfo.js"; | ||
class CLIArgParser { | ||
@@ -136,2 +138,12 @@ args = []; | ||
module.exports = new CLIArgParser(process.argv.slice(2)); | ||
export const yargs = new CLIArgParser(process.argv.slice(2)) | ||
.usage(`\n${_PRO_TIMER}\n\nUsage: ${pkInfo.name} -f|--file [-o|output_dir]`) | ||
.alias('v', 'version', 'Display version.') | ||
.alias('h', 'help', 'Display brief help information.') | ||
.alias('f', 'file', '(required) Full path of input PDF file or a directory to scan for all PDF files.\n\t\t When specifying a PDF file name, it must end with .PDF, otherwise it would be treated as a input directory.') | ||
.alias('o', 'output', '(optional) Full path of output directory, must already exist.\n\t\t Current JSON file in the output folder will be replaced when file name is same.') | ||
.alias('s', 'silent', '(optional) when specified, will only log errors, otherwise verbose.') | ||
.alias('t', 'fieldTypes', '(optional) when specified, will generate .fields.json that includes fields ids and types.') | ||
.alias('c', 'content', '(optional) when specified, will generate .content.txt that includes text content from PDF.') | ||
.alias('m', 'merge', '(optional) when specified, will generate .merged.json that includes auto-merged broken text blocks from PDF.') | ||
.alias('r', 'stream', '(optional) when specified, will process and parse with buffer/object transform stream rather than file system.'); |
@@ -1,5 +0,5 @@ | ||
const {Transform, Readable} = require("stream"), | ||
fs = require('fs'); | ||
import { Transform, Readable } from "stream"; | ||
import fs from "fs"; | ||
class ParserStream extends Transform { | ||
export class ParserStream extends Transform { | ||
static createContentStream(jsonObj) { | ||
@@ -12,11 +12,6 @@ const rStream = new Readable({objectMode: true}); | ||
static createOutputStream(outputPath, callback) { | ||
static createOutputStream(outputPath, resolve, reject) { | ||
const outputStream = fs.createWriteStream(outputPath); | ||
outputStream.on('finish', () => { | ||
callback(null, outputPath); | ||
}); | ||
outputStream.on('error', err => { | ||
callback({"streamError": err}, outputPath); | ||
}); | ||
outputStream.on('finish', () => resolve(outputPath)); | ||
outputStream.on('error', err => reject(err) ); | ||
return outputStream; | ||
@@ -71,3 +66,3 @@ } | ||
class StringifyStream extends Transform { | ||
export class StringifyStream extends Transform { | ||
constructor(options) { | ||
@@ -85,3 +80,1 @@ super(options); | ||
} | ||
module.exports = {ParserStream, StringifyStream}; |
@@ -1,14 +0,23 @@ | ||
const nodeUtil = require("util"), | ||
{EventEmitter} = require("events"), | ||
{Blob} = require("buffer"), | ||
fs = require("fs"), | ||
DOMParser = require("@xmldom/xmldom").DOMParser, | ||
PDFCanvas = require("./pdfcanvas"), | ||
PDFUnit = require("./pdfunit"), | ||
PDFField = require("./pdffield"), | ||
PDFAnno = require("./pdfanno"), | ||
Image = require("./pdfimage"), | ||
pkInfo = require("../package.json"), | ||
PDFFont = require("./pdffont"); | ||
import nodeUtil from "util"; | ||
import fs from "fs"; | ||
import path from 'path'; | ||
import {fileURLToPath} from 'url'; | ||
import {EventEmitter} from "events"; | ||
import {Blob} from "buffer"; | ||
import {DOMParser} from "@xmldom/xmldom"; | ||
import PDFCanvas from "./pdfcanvas.js"; | ||
import PDFUnit from "./pdfunit.js"; | ||
import PDFField from "./pdffield.js"; | ||
import PDFAnno from "./pdfanno.js"; | ||
import Image from "./pdfimage.js"; | ||
import PDFFont from "./pdffont.js"; | ||
import PTIXmlParser from "./ptixmlinject.js"; | ||
import { pkInfo, _PARSER_SIG } from "../pkinfo.js"; | ||
const __filename = fileURLToPath(import.meta.url); | ||
const __dirname = path.dirname(__filename); | ||
const _pdfjsFiles = [ | ||
@@ -49,4 +58,2 @@ 'shared/util.js', | ||
const _PARSER_SIG = `${pkInfo.name}@${pkInfo.version} [${pkInfo.homepage}]`; | ||
//////replacing HTML5 canvas with PDFCanvas (in-memory canvas) | ||
@@ -58,9 +65,6 @@ function createScratchCanvas(width, height) { return new PDFCanvas({}, width, height); } | ||
const _basePath = __dirname + "/../base/"; | ||
let _fileContent = ''; | ||
const baseDir = `${__dirname}/../base/`; | ||
const _baseCode = _pdfjsFiles.reduce( (preContent, fileName, idx, arr) => preContent += fs.readFileSync(baseDir + fileName, 'utf8'), ""); | ||
eval(_baseCode); | ||
_pdfjsFiles.forEach( (fieldName, idx, arr) => _fileContent += fs.readFileSync(_basePath + fieldName, 'utf8') ); | ||
eval(_fileContent); | ||
////////////////////////////////start of helper classes | ||
@@ -192,3 +196,3 @@ class PDFPageParser { | ||
////////////////////////////////Start of Node.js Module | ||
class PDFJSClass extends EventEmitter { | ||
export default class PDFJSClass extends EventEmitter { | ||
pdfDocument = null; | ||
@@ -243,3 +247,2 @@ pages = null; | ||
let PTIXmlParser = require('./ptixmlinject'); | ||
this.ptiParser = new PTIXmlParser(); | ||
@@ -445,5 +448,2 @@ this.ptiParser.parseXml(fieldInfoXMLPath, err => { | ||
} | ||
module.exports = PDFJSClass; | ||
} |
@@ -1,2 +0,2 @@ | ||
const nodeUtil = require("util"); | ||
import nodeUtil from "util"; | ||
@@ -172,3 +172,3 @@ //BEGIN - MQZ 9/19/2012. Helper functions to parse acroForm elements | ||
class PDFAnno { | ||
export default class PDFAnno { | ||
static processAnnotation(annotation, item) { | ||
@@ -199,5 +199,2 @@ if (item.fieldType == 'Btn') { //PDF Spec p.675 | ||
} | ||
} | ||
module.exports = PDFAnno; | ||
} |
@@ -1,5 +0,5 @@ | ||
const nodeUtil = require("util"), | ||
PDFLine = require('./pdfline'), | ||
PDFFill = require('./pdffill'), | ||
PDFFont = require('./pdffont'); | ||
import nodeUtil from "util"; | ||
import PDFLine from "./pdfline.js"; | ||
import PDFFill from "./pdffill.js"; | ||
import PDFFont from "./pdffont.js"; | ||
@@ -74,3 +74,3 @@ // alias some functions to make (compiled) code shorter | ||
if (guts.length == 4 && styleString.substr(3, 1) == 'a') { | ||
if (guts.length == 4 && styleString.substring(3, 4) == 'a') { | ||
alpha = guts[3]; | ||
@@ -171,3 +171,3 @@ } | ||
*/ | ||
class CanvasRenderingContext2D_ { | ||
export default class CanvasRenderingContext2D_ { | ||
constructor(canvasTarget, scaledWidth, scaledHeight) { | ||
@@ -600,8 +600,2 @@ this.m_ = createMatrixIdentity(); | ||
} | ||
} | ||
// set up externs | ||
module.exports = CanvasRenderingContext2D_; | ||
// CanvasRenderingContext2D = CanvasRenderingContext2D_; | ||
// CanvasGradient = CanvasGradient_; | ||
// CanvasPattern = CanvasPattern_; | ||
} |
@@ -1,2 +0,2 @@ | ||
const kColors = [ | ||
export const kColors = [ | ||
'#000000', // 0 | ||
@@ -41,3 +41,3 @@ '#ffffff', // 1 | ||
const kFontFaces = [ | ||
export const kFontFaces = [ | ||
"quicktype,arial,helvetica,sans-serif", // 00 - QuickType - sans-serif variable font | ||
@@ -51,3 +51,3 @@ "quicktype condensed,arial narrow,arial,helvetica,sans-serif", // 01 - QuickType Condensed - thin sans-serif variable font | ||
const kFontStyles = [ | ||
export const kFontStyles = [ | ||
// Face Size Bold Italic StyleID(Comment) | ||
@@ -116,5 +116,2 @@ // ----- ---- ---- ----- ----------------- | ||
[5, 12, 0, 0] //60 | ||
]; | ||
module.exports = {kColors, kFontFaces, kFontStyles}; | ||
]; |
@@ -1,3 +0,3 @@ | ||
const nodeUtil = require("util"), | ||
PDFUnit = require("./pdfunit"); | ||
import nodeUtil from "util"; | ||
import PDFUnit from "./pdfunit.js"; | ||
@@ -8,3 +8,3 @@ const kFBANotOverridable = 0x00000400; // indicates the field is read only by the user | ||
class PDFField { | ||
export default class PDFField { | ||
static tabIndex = 0; | ||
@@ -299,5 +299,2 @@ | ||
} | ||
} | ||
module.exports = PDFField; | ||
} |
@@ -0,6 +1,5 @@ | ||
import nodeUtil from "util"; | ||
import PDFUnit from "./pdfunit.js"; | ||
const nodeUtil = require("util"), | ||
PDFUnit = require("./pdfunit"); | ||
class PDFFill{ | ||
export default class PDFFill{ | ||
// constructor | ||
@@ -34,5 +33,2 @@ constructor(x, y, width, height, color) { | ||
} | ||
} | ||
module.exports = PDFFill; | ||
} |
@@ -1,4 +0,4 @@ | ||
const nodeUtil = require("util"), | ||
PDFUnit = require("./pdfunit"), | ||
{kFontFaces, kFontStyles} = require("./pdfconst"); | ||
import nodeUtil from "util"; | ||
import PDFUnit from "./pdfunit.js"; | ||
import {kFontFaces, kFontStyles} from "./pdfconst.js"; | ||
@@ -9,3 +9,3 @@ const _boldSubNames = ["bd", "bold", "demi", "black"]; | ||
class PDFFont { | ||
export default class PDFFont { | ||
#initTypeName() { | ||
@@ -317,4 +317,2 @@ let typeName = (this.fontObj.name || this.fontObj.fallbackName); | ||
} | ||
} | ||
module.exports = PDFFont; | ||
} |
class PDFImage { | ||
export default class PDFImage { | ||
#_src = ''; | ||
@@ -33,4 +33,2 @@ #_onload = null; | ||
} | ||
module.exports = PDFImage; | ||
} |
@@ -1,5 +0,5 @@ | ||
const nodeUtil = require("util"), | ||
PDFUnit = require("./pdfunit"); | ||
import nodeUtil from "util"; | ||
import PDFUnit from "./pdfunit.js"; | ||
class PDFLine { | ||
export default class PDFLine { | ||
constructor(x1, y1, x2, y2, lineWidth, color, dashed) { | ||
@@ -64,5 +64,2 @@ this.x1 = x1; | ||
} | ||
} | ||
module.exports = PDFLine; | ||
} |
@@ -1,2 +0,2 @@ | ||
const {kColors} = require("./pdfconst"); | ||
import {kColors} from "./pdfconst.js"; | ||
@@ -11,3 +11,3 @@ const dpi = 96.0; | ||
class PDFUnit { | ||
export default class PDFUnit { | ||
static toFixedFloat(fNum) { | ||
@@ -78,5 +78,2 @@ return parseFloat(fNum.toFixed(3)); | ||
} | ||
} | ||
module.exports = PDFUnit; | ||
} |
@@ -1,5 +0,5 @@ | ||
const fs = require("fs"), | ||
DOMParser = require("@xmldom/xmldom").DOMParser; | ||
import fs from "fs"; | ||
import { DOMParser } from "@xmldom/xmldom"; | ||
class PTIXmlParser { | ||
export default class PTIXmlParser { | ||
xmlData = null; | ||
@@ -74,6 +74,2 @@ ptiPageArray = []; | ||
} | ||
} | ||
module.exports = PTIXmlParser; | ||
} |
@@ -7,2 +7,50 @@ # Changelog | ||
## [0.8.2](https://github.com/xmldom/xmldom/compare/0.8.1...0.8.2) | ||
### Fixed | ||
- fix(dom): Serialize `>` as specified (#395) [`#58`](https://github.com/xmldom/xmldom/issues/58) | ||
### Other | ||
- docs: Add `nodeType` values to public interface description [`#396`](https://github.com/xmldom/xmldom/pull/396) | ||
- test: Add executable examples for node and typescript [`#317`](https://github.com/xmldom/xmldom/pull/317) | ||
- fix(dom): Serialize `>` as specified [`#395`](https://github.com/xmldom/xmldom/pull/395) | ||
- chore: Add minimal `Object.assign` ponyfill [`#379`](https://github.com/xmldom/xmldom/pull/379) | ||
- docs: Refine release documentation [`#378`](https://github.com/xmldom/xmldom/pull/378) | ||
- chore: update various dev dependencies | ||
Thank you [@niklasl](https://github.com/niklasl), [@cburatto](https://github.com/cburatto), [@SheetJSDev](https://github.com/SheetJSDev), [@pyrsmk](https://github.com/pyrsmk) for your contributions | ||
## [0.8.1](https://github.com/xmldom/xmldom/compare/0.8.0...0.8.1) | ||
### Fixes | ||
- Only use own properties in entityMap [`#374`](https://github.com/xmldom/xmldom/pull/374) | ||
### Docs | ||
- Add security policy [`#365`](https://github.com/xmldom/xmldom/pull/365) | ||
- changelog: Correct contributor name and link [`#366`](https://github.com/xmldom/xmldom/pull/366) | ||
- Describe release/publish steps [`#358`](https://github.com/xmldom/xmldom/pull/358), [`#376`](https://github.com/xmldom/xmldom/pull/376) | ||
- Add snyk package health badge [`#360`](https://github.com/xmldom/xmldom/pull/360) | ||
## [0.8.0](https://github.com/xmldom/xmldom/compare/0.7.5...0.8.0) | ||
### Fixed | ||
- Normalize all line endings according to XML specs [1.0](https://w3.org/TR/xml/#sec-line-ends) and [1.1](https://www.w3.org/TR/xml11/#sec-line-ends) \ | ||
BREAKING CHANGE: Certain combination of line break characters are normalized to a single `\n` before parsing takes place and will no longer be preserved. | ||
- [`#303`](https://github.com/xmldom/xmldom/issues/303) / [`#307`](https://github.com/xmldom/xmldom/pull/307) | ||
- [`#49`](https://github.com/xmldom/xmldom/issues/49), [`#97`](https://github.com/xmldom/xmldom/issues/97), [`#324`](https://github.com/xmldom/xmldom/issues/324) / [`#314`](https://github.com/xmldom/xmldom/pull/314) | ||
- XMLSerializer: Preserve whitespace character references [`#284`](https://github.com/xmldom/xmldom/issues/284) / [`#310`](https://github.com/xmldom/xmldom/pull/310) \ | ||
BREAKING CHANGE: If you relied on the not spec compliant preservation of literal `\t`, `\n` or `\r` in **attribute values**. | ||
To preserve those you will have to create XML that instead contains the correct numerical (or hexadecimal) equivalent (e.g. `	`, `
`, `
`). | ||
- Drop deprecated exports `DOMImplementation` and `XMLSerializer` from `lib/dom-parser.js` [#53](https://github.com/xmldom/xmldom/issues/53) / [`#309`](https://github.com/xmldom/xmldom/pull/309) | ||
BREAKING CHANGE: Use the one provided by the main package export. | ||
- dom: Remove all links as part of `removeChild` [`#343`](https://github.com/xmldom/xmldom/issues/343) / [`#355`](https://github.com/xmldom/xmldom/pull/355) | ||
### Chore | ||
- ci: Restore latest tested node version to 16.x [`#325`](https://github.com/xmldom/xmldom/pull/325) | ||
- ci: Split test and lint steps into jobs [`#111`](https://github.com/xmldom/xmldom/issues/111) / [`#304`](https://github.com/xmldom/xmldom/pull/304) | ||
- Pinned and updated devDependencies | ||
Thank you [@marrus-sh](https://github.com/marrus-sh), [@victorandree](https://github.com/victorandree), [@mdierolf](https://github.com/mdierolf), [@tsabbay](https://github.com/tsabbay), [@fatihpense](https://github.com/fatihpense) for your contributions | ||
## 0.7.5 | ||
@@ -24,3 +72,3 @@ | ||
- Restore ability to parse `__prototype__` attributes [`#315`](https://github.com/xmldom/xmldom/pull/315) | ||
Thank you [@dsimsonOMF](https://github.com/dsimsonOMF) | ||
Thank you [@dsimpsonOMF](https://github.com/dsimpsonOMF) | ||
@@ -27,0 +75,0 @@ ## 0.7.3 |
@@ -26,2 +26,27 @@ 'use strict' | ||
/** | ||
* Since we can not rely on `Object.assign` we provide a simplified version | ||
* that is sufficient for our needs. | ||
* | ||
* @param {Object} target | ||
* @param {Object | null | undefined} source | ||
* | ||
* @returns {Object} target | ||
* @throws TypeError if target is not an object | ||
* | ||
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/assign | ||
* @see https://tc39.es/ecma262/multipage/fundamental-objects.html#sec-object.assign | ||
*/ | ||
function assign(target, source) { | ||
if (target === null || typeof target !== 'object') { | ||
throw new TypeError('target is not an object') | ||
} | ||
for (var key in source) { | ||
if (Object.prototype.hasOwnProperty.call(source, key)) { | ||
target[key] = source[key] | ||
} | ||
} | ||
return target | ||
} | ||
/** | ||
* All mime types that are allowed as input to `DOMParser.parseFromString` | ||
@@ -143,4 +168,5 @@ * | ||
exports.assign = assign; | ||
exports.freeze = freeze; | ||
exports.MIME_TYPE = MIME_TYPE; | ||
exports.NAMESPACE = NAMESPACE; |
@@ -13,2 +13,60 @@ var conventions = require("./conventions"); | ||
/** | ||
* Normalizes line ending according to https://www.w3.org/TR/xml11/#sec-line-ends: | ||
* | ||
* > XML parsed entities are often stored in computer files which, | ||
* > for editing convenience, are organized into lines. | ||
* > These lines are typically separated by some combination | ||
* > of the characters CARRIAGE RETURN (#xD) and LINE FEED (#xA). | ||
* > | ||
* > To simplify the tasks of applications, the XML processor must behave | ||
* > as if it normalized all line breaks in external parsed entities (including the document entity) | ||
* > on input, before parsing, by translating all of the following to a single #xA character: | ||
* > | ||
* > 1. the two-character sequence #xD #xA | ||
* > 2. the two-character sequence #xD #x85 | ||
* > 3. the single character #x85 | ||
* > 4. the single character #x2028 | ||
* > 5. any #xD character that is not immediately followed by #xA or #x85. | ||
* | ||
* @param {string} input | ||
* @returns {string} | ||
*/ | ||
function normalizeLineEndings(input) { | ||
return input | ||
.replace(/\r[\n\u0085]/g, '\n') | ||
.replace(/[\r\u0085\u2028]/g, '\n') | ||
} | ||
/** | ||
* @typedef Locator | ||
* @property {number} [columnNumber] | ||
* @property {number} [lineNumber] | ||
*/ | ||
/** | ||
* @typedef DOMParserOptions | ||
* @property {DOMHandler} [domBuilder] | ||
* @property {Function} [errorHandler] | ||
* @property {(string) => string} [normalizeLineEndings] used to replace line endings before parsing | ||
* defaults to `normalizeLineEndings` | ||
* @property {Locator} [locator] | ||
* @property {Record<string, string>} [xmlns] | ||
* | ||
* @see normalizeLineEndings | ||
*/ | ||
/** | ||
* The DOMParser interface provides the ability to parse XML or HTML source code | ||
* from a string into a DOM `Document`. | ||
* | ||
* _xmldom is different from the spec in that it allows an `options` parameter, | ||
* to override the default behavior._ | ||
* | ||
* @param {DOMParserOptions} [options] | ||
* @constructor | ||
* | ||
* @see https://developer.mozilla.org/en-US/docs/Web/API/DOMParser | ||
* @see https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization | ||
*/ | ||
function DOMParser(options){ | ||
@@ -37,6 +95,11 @@ this.options = options ||{locator:{}}; | ||
defaultNSMap.xml = defaultNSMap.xml || NAMESPACE.XML; | ||
if(source && typeof source === 'string'){ | ||
sax.parse(source,defaultNSMap,entityMap); | ||
}else{ | ||
sax.errorHandler.error("invalid doc source"); | ||
var normalize = options.normalizeLineEndings || normalizeLineEndings; | ||
if (source && typeof source === 'string') { | ||
sax.parse( | ||
normalize(source), | ||
defaultNSMap, | ||
entityMap | ||
) | ||
} else { | ||
sax.errorHandler.error('invalid doc source') | ||
} | ||
@@ -260,12 +323,3 @@ return domBuilder.doc; | ||
exports.__DOMHandler = DOMHandler; | ||
exports.normalizeLineEndings = normalizeLineEndings; | ||
exports.DOMParser = DOMParser; | ||
/** | ||
* @deprecated Import/require from main entry point instead | ||
*/ | ||
exports.DOMImplementation = dom.DOMImplementation; | ||
/** | ||
* @deprecated Import/require from main entry point instead | ||
*/ | ||
exports.XMLSerializer = dom.XMLSerializer; |
@@ -594,3 +594,15 @@ var conventions = require("./conventions"); | ||
function _onUpdateChild(doc,el,newChild){ | ||
/** | ||
* Updates `el.childNodes`, updating the indexed items and it's `length`. | ||
* Passing `newChild` means it will be appended. | ||
* Otherwise it's assumed that an item has been removed, | ||
* and `el.firstNode` and it's `.nextSibling` are used | ||
* to walk the current list of child nodes. | ||
* | ||
* @param {Document} doc | ||
* @param {Node} el | ||
* @param {Node} [newChild] | ||
* @private | ||
*/ | ||
function _onUpdateChild (doc, el, newChild) { | ||
if(doc && doc._inc){ | ||
@@ -600,13 +612,13 @@ doc._inc++; | ||
var cs = el.childNodes; | ||
if(newChild){ | ||
if (newChild) { | ||
cs[cs.length++] = newChild; | ||
}else{ | ||
//console.log(1) | ||
} else { | ||
var child = el.firstChild; | ||
var i = 0; | ||
while(child){ | ||
while (child) { | ||
cs[i++] = child; | ||
child =child.nextSibling; | ||
child = child.nextSibling; | ||
} | ||
cs.length = i; | ||
delete cs[cs.length]; | ||
} | ||
@@ -617,23 +629,30 @@ } | ||
/** | ||
* attributes; | ||
* children; | ||
* | ||
* writeable properties: | ||
* nodeValue,Attr:value,CharacterData:data | ||
* prefix | ||
* Removes the connections between `parentNode` and `child` | ||
* and any existing `child.previousSibling` or `child.nextSibling`. | ||
* | ||
* @see https://github.com/xmldom/xmldom/issues/135 | ||
* @see https://github.com/xmldom/xmldom/issues/145 | ||
* | ||
* @param {Node} parentNode | ||
* @param {Node} child | ||
* @returns {Node} the child that was removed. | ||
* @private | ||
*/ | ||
function _removeChild(parentNode,child){ | ||
function _removeChild (parentNode, child) { | ||
var previous = child.previousSibling; | ||
var next = child.nextSibling; | ||
if(previous){ | ||
if (previous) { | ||
previous.nextSibling = next; | ||
}else{ | ||
parentNode.firstChild = next | ||
} else { | ||
parentNode.firstChild = next; | ||
} | ||
if(next){ | ||
if (next) { | ||
next.previousSibling = previous; | ||
}else{ | ||
} else { | ||
parentNode.lastChild = previous; | ||
} | ||
_onUpdateChild(parentNode.ownerDocument,parentNode); | ||
child.parentNode = null; | ||
child.previousSibling = null; | ||
child.nextSibling = null; | ||
_onUpdateChild(parentNode.ownerDocument, parentNode); | ||
return child; | ||
@@ -684,23 +703,31 @@ } | ||
} | ||
function _appendSingleChild(parentNode,newChild){ | ||
var cp = newChild.parentNode; | ||
if(cp){ | ||
var pre = parentNode.lastChild; | ||
cp.removeChild(newChild);//remove and update | ||
var pre = parentNode.lastChild; | ||
/** | ||
* Appends `newChild` to `parentNode`. | ||
* If `newChild` is already connected to a `parentNode` it is first removed from it. | ||
* | ||
* @see https://github.com/xmldom/xmldom/issues/135 | ||
* @see https://github.com/xmldom/xmldom/issues/145 | ||
* @param {Node} parentNode | ||
* @param {Node} newChild | ||
* @returns {Node} | ||
* @private | ||
*/ | ||
function _appendSingleChild (parentNode, newChild) { | ||
if (newChild.parentNode) { | ||
newChild.parentNode.removeChild(newChild); | ||
} | ||
var pre = parentNode.lastChild; | ||
newChild.parentNode = parentNode; | ||
newChild.previousSibling = pre; | ||
newChild.previousSibling = parentNode.lastChild; | ||
newChild.nextSibling = null; | ||
if(pre){ | ||
pre.nextSibling = newChild; | ||
}else{ | ||
if (newChild.previousSibling) { | ||
newChild.previousSibling.nextSibling = newChild; | ||
} else { | ||
parentNode.firstChild = newChild; | ||
} | ||
parentNode.lastChild = newChild; | ||
_onUpdateChild(parentNode.ownerDocument,parentNode,newChild); | ||
_onUpdateChild(parentNode.ownerDocument, parentNode, newChild); | ||
return newChild; | ||
//console.log("__aa",parentNode.lastChild.nextSibling == null) | ||
} | ||
Document.prototype = { | ||
@@ -1158,8 +1185,15 @@ //implementation : null, | ||
* Well-formed constraint: No < in Attribute Values | ||
* The replacement text of any entity referred to directly or indirectly in an attribute value must not contain a <. | ||
* @see https://www.w3.org/TR/xml/#CleanAttrVals | ||
* @see https://www.w3.org/TR/xml/#NT-AttValue | ||
* > The replacement text of any entity referred to directly or indirectly | ||
* > in an attribute value must not contain a <. | ||
* @see https://www.w3.org/TR/xml11/#CleanAttrVals | ||
* @see https://www.w3.org/TR/xml11/#NT-AttValue | ||
* | ||
* Literal whitespace other than space that appear in attribute values | ||
* are serialized as their entity references, so they will be preserved. | ||
* (In contrast to whitespace literals in the input which are normalized to spaces) | ||
* @see https://www.w3.org/TR/xml11/#AVNormalize | ||
* @see https://w3c.github.io/DOM-Parsing/#serializing-an-element-s-attributes | ||
*/ | ||
function addSerializedAttribute(buf, qualifiedName, value) { | ||
buf.push(' ', qualifiedName, '="', value.replace(/[<&"]/g,_xmlEncoder), '"') | ||
buf.push(' ', qualifiedName, '="', value.replace(/[<>&"\t\n\r]/g, _xmlEncoder), '"') | ||
} | ||
@@ -1309,6 +1343,6 @@ | ||
* @see https://www.w3.org/TR/xml/#NT-CharData | ||
* @see https://w3c.github.io/DOM-Parsing/#xml-serializing-a-text-node | ||
*/ | ||
return buf.push(node.data | ||
.replace(/[<&]/g,_xmlEncoder) | ||
.replace(/]]>/g, ']]>') | ||
.replace(/[<&>]/g,_xmlEncoder) | ||
); | ||
@@ -1315,0 +1349,0 @@ case CDATA_SECTION_NODE: |
@@ -15,3 +15,3 @@ var NAMESPACE = require("./conventions").NAMESPACE; | ||
var S_TAG = 0;//tag name offerring | ||
var S_ATTR = 1;//attr name offerring | ||
var S_ATTR = 1;//attr name offerring | ||
var S_ATTR_SPACE=2;//attr name end and space offer | ||
@@ -40,3 +40,3 @@ var S_EQ = 3;//=space? | ||
function XMLReader(){ | ||
} | ||
@@ -70,4 +70,4 @@ | ||
var k = a.slice(1,-1); | ||
if(k in entityMap){ | ||
return entityMap[k]; | ||
if (Object.hasOwnProperty.call(entityMap, k)) { | ||
return entityMap[k]; | ||
}else if(k.charAt(0) === '#'){ | ||
@@ -101,3 +101,3 @@ return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x'))) | ||
var locator = domBuilder.locator; | ||
var parseStack = [{currentNSMap:defaultNSMapCopy}] | ||
@@ -127,3 +127,3 @@ var closeMap = {}; | ||
if(end<0){ | ||
tagName = source.substring(tagStart+2).replace(/[\s<].*/,''); | ||
@@ -153,3 +153,3 @@ errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName); | ||
} | ||
end++; | ||
@@ -173,4 +173,4 @@ break; | ||
var len = el.length; | ||
if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){ | ||
@@ -243,3 +243,11 @@ el.closed = true; | ||
} | ||
el.addValue(qname, value, startIndex) | ||
el.addValue( | ||
qname, | ||
// @see https://www.w3.org/TR/xml/#AVNormalize | ||
// since the xmldom sax parser does not "interpret" DTD the following is not implemented: | ||
// - recursive replacement of (DTD) entity references | ||
// - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA | ||
value.replace(/[\t\n\r]/g, ' ').replace(/&#?\w+;/g, entityReplacer), | ||
startIndex | ||
) | ||
} | ||
@@ -275,3 +283,3 @@ var attrName; | ||
if(p>0){ | ||
value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer); | ||
value = source.slice(start, p); | ||
addAttribute(attrName, value, start-1); | ||
@@ -284,6 +292,4 @@ s = S_ATTR_END; | ||
}else if(s == S_ATTR_NOQUOT_VALUE){ | ||
value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer); | ||
//console.log(attrName,value,start,p) | ||
value = source.slice(start, p); | ||
addAttribute(attrName, value, start); | ||
//console.dir(el) | ||
errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!'); | ||
@@ -342,3 +348,3 @@ start = p+1; | ||
errorHandler.warning('attribute "'+value+'" missed quot(")!'); | ||
addAttribute(attrName, value.replace(/&#?\w+;/g,entityReplacer), start) | ||
addAttribute(attrName, value, start) | ||
}else{ | ||
@@ -371,3 +377,3 @@ if(!NAMESPACE.isHTML(currentNSMap['']) || !value.match(/^(?:disabled|checked|selected)$/i)){ | ||
case S_ATTR_NOQUOT_VALUE: | ||
var value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer); | ||
var value = source.slice(start, p); | ||
errorHandler.warning('attribute "'+value+'" missed quot(")!!'); | ||
@@ -444,3 +450,3 @@ addAttribute(attrName, value, start) | ||
a.localName = localName ; | ||
//prefix == null for no ns prefix attribute | ||
//prefix == null for no ns prefix attribute | ||
if(nsPrefix !== false){//hack!! | ||
@@ -455,3 +461,3 @@ if(localNSMap == null){ | ||
a.uri = NAMESPACE.XMLNS | ||
domBuilder.startPrefixMapping(nsPrefix, value) | ||
domBuilder.startPrefixMapping(nsPrefix, value) | ||
} | ||
@@ -468,3 +474,3 @@ } | ||
a.uri = currentNSMap[prefix || ''] | ||
//{console.log('###'+a.qName,domBuilder.locator.systemId+'',currentNSMap,a.uri)} | ||
@@ -491,3 +497,3 @@ } | ||
for(prefix in localNSMap){ | ||
domBuilder.endPrefixMapping(prefix) | ||
domBuilder.endPrefixMapping(prefix) | ||
} | ||
@@ -519,3 +525,3 @@ } | ||
//} | ||
} | ||
@@ -537,3 +543,3 @@ } | ||
return pos<elStartEnd; | ||
//} | ||
//} | ||
} | ||
@@ -566,7 +572,7 @@ function _copy(source,target){ | ||
domBuilder.characters(source,start+9,end-start-9); | ||
domBuilder.endCDATA() | ||
domBuilder.endCDATA() | ||
return end+3; | ||
} | ||
//<!DOCTYPE | ||
//startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId) | ||
//startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId) | ||
var matchs = split(source,start); | ||
@@ -589,3 +595,3 @@ var len = matchs.length; | ||
domBuilder.endDTD(); | ||
return lastMatch.index+lastMatch[0].length | ||
@@ -639,3 +645,3 @@ } | ||
// if(localName){ | ||
// | ||
// | ||
// }else{ | ||
@@ -642,0 +648,0 @@ // var qName = uri |
{ | ||
"name": "@xmldom/xmldom", | ||
"version": "0.7.5", | ||
"version": "0.8.2", | ||
"description": "A pure JavaScript W3C standard-based (XML DOM Level 2 Core) DOMParser and XMLSerializer module.", | ||
@@ -26,2 +26,3 @@ "keywords": [ | ||
"readme.md", | ||
"SECURITY.md", | ||
"index.d.ts", | ||
@@ -32,6 +33,9 @@ "lib" | ||
"lint": "eslint lib test", | ||
"changelog": "auto-changelog --unreleased-only", | ||
"start": "nodemon --watch package.json --watch lib --watch test --exec 'npm --silent run test && npm --silent run lint'", | ||
"stryker": "stryker run", | ||
"stryker:dry-run": "stryker run -m '' --reporters progress", | ||
"test": "jest" | ||
"test": "jest", | ||
"version": "./changelog-has-version.sh", | ||
"release": "np --no-yarn" | ||
}, | ||
@@ -43,13 +47,15 @@ "engines": { | ||
"devDependencies": { | ||
"@stryker-mutator/core": "^5.2.2", | ||
"eslint": "^7.32.0", | ||
"eslint-config-prettier": "^8.3.0", | ||
"eslint-plugin-es5": "^1.5.0", | ||
"eslint-plugin-prettier": "^3.4.1", | ||
"get-stream": "^6.0.1", | ||
"jest": "^27.0.6", | ||
"nodemon": "^2.0.12", | ||
"prettier": "^2.3.2", | ||
"xmltest": "^1.5.0", | ||
"yauzl": "^2.10.0" | ||
"@stryker-mutator/core": "5.6.1", | ||
"auto-changelog": "2.4.0", | ||
"eslint": "8.12.0", | ||
"eslint-config-prettier": "8.5.0", | ||
"eslint-plugin-es5": "1.5.0", | ||
"eslint-plugin-prettier": "4.0.0", | ||
"get-stream": "6.0.1", | ||
"jest": "27.5.1", | ||
"nodemon": "2.0.15", | ||
"np": "7.6.1", | ||
"prettier": "2.6.2", | ||
"xmltest": "1.5.0", | ||
"yauzl": "2.10.0" | ||
}, | ||
@@ -59,3 +65,9 @@ "bugs": { | ||
}, | ||
"license": "MIT" | ||
"license": "MIT", | ||
"auto-changelog": { | ||
"prepend": true, | ||
"remote": "upstream", | ||
"tagPrefix": "", | ||
"template": "./auto-changelog.hbs" | ||
} | ||
} |
@@ -6,4 +6,5 @@ # @xmldom/xmldom | ||
[![license](https://img.shields.io/npm/l/@xmldom/xmldom?color=blue&style=flat-square)](LICENSE) | ||
[![license(MIT)](https://img.shields.io/npm/l/@xmldom/xmldom?color=blue&style=flat-square)](https://github.com/xmldom/xmldom/blob/master/LICENSE) | ||
[![npm](https://img.shields.io/npm/v/@xmldom/xmldom?style=flat-square)](https://www.npmjs.com/package/@xmldom/xmldom) | ||
[![snyk.io package health](https://snyk.io/advisor/npm-package/@xmldom/xmldom/badge.svg)](https://snyk.io/advisor/npm-package/@xmldom/xmldom) | ||
[![bug issues](https://img.shields.io/github/issues/xmldom/xmldom/bug?color=red&style=flat-square)](https://github.com/xmldom/xmldom/issues?q=is%3Aissue+is%3Aopen+label%3Abug) | ||
@@ -44,24 +45,19 @@ [![help-wanted issues](https://img.shields.io/github/issues/xmldom/xmldom/help-wanted?color=darkgreen&style=flat-square)](https://github.com/xmldom/xmldom/issues?q=is%3Aissue+is%3Aopen+label%3Ahelp-wanted) | ||
[In NodeJS](examples/nodejs/src/index.js) | ||
```javascript | ||
const { DOMParser } = require('@xmldom/xmldom') | ||
const { DOMParser, XMLSerializer } = require('@xmldom/xmldom') | ||
const doc = new DOMParser().parseFromString( | ||
'<xml xmlns="a" xmlns:c="./lite">\n' + | ||
'\t<child>test</child>\n' + | ||
'\t<child></child>\n' + | ||
'\t<child/>\n' + | ||
'</xml>', | ||
'text/xml' | ||
) | ||
doc.documentElement.setAttribute('x', 'y') | ||
doc.documentElement.setAttributeNS('./lite', 'c:x', 'y2') | ||
console.info(doc) | ||
const source = `<xml xmlns="a"> | ||
<child>test</child> | ||
<child/> | ||
</xml>` | ||
const nsAttr = doc.documentElement.getAttributeNS('./lite', 'x') | ||
console.info(nsAttr) | ||
const doc = new DOMParser().parseFromString(source, 'text/xml') | ||
const serialized = new XMLSerializer().serializeToString(doc) | ||
``` | ||
Note: in Typescript and ES6 you can use the import approach, as follows: | ||
Note: in Typescript ~and ES6~(see #316) you can use the `import` approach, as follows: | ||
```javascript | ||
```typescript | ||
import { DOMParser } from '@xmldom/xmldom' | ||
@@ -107,168 +103,192 @@ ``` | ||
* [Node](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-1950641247) | ||
attribute: | ||
nodeValue|prefix | ||
readonly attribute: | ||
nodeName|nodeType|parentNode|childNodes|firstChild|lastChild|previousSibling|nextSibling|attributes|ownerDocument|namespaceURI|localName | ||
method: | ||
insertBefore(newChild, refChild) | ||
replaceChild(newChild, oldChild) | ||
removeChild(oldChild) | ||
appendChild(newChild) | ||
hasChildNodes() | ||
cloneNode(deep) | ||
normalize() | ||
isSupported(feature, version) | ||
hasAttributes() | ||
* [Node](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-1950641247) | ||
readonly class properties (aka `NodeType`), | ||
these can be accessed from any `Node` instance `node`: | ||
`if (node.nodeType === node.ELEMENT_NODE) {...` | ||
1. `ELEMENT_NODE` (`1`) | ||
2. `ATTRIBUTE_NODE` (`2`) | ||
3. `TEXT_NODE` (`3`) | ||
4. `CDATA_SECTION_NODE` (`4`) | ||
5. `ENTITY_REFERENCE_NODE` (`5`) | ||
6. `ENTITY_NODE` (`6`) | ||
7. `PROCESSING_INSTRUCTION_NODE` (`7`) | ||
8. `COMMENT_NODE` (`8`) | ||
9. `DOCUMENT_NODE` (`9`) | ||
10. `DOCUMENT_TYPE_NODE` (`10`) | ||
11. `DOCUMENT_FRAGMENT_NODE` (`11`) | ||
12. `NOTATION_NODE` (`12`) | ||
attribute: | ||
- `nodeValue` | `prefix` | ||
readonly attribute: | ||
- `nodeName` | `nodeType` | `parentNode` | `childNodes` | `firstChild` | `lastChild` | `previousSibling` | `nextSibling` | `attributes` | `ownerDocument` | `namespaceURI` | `localName` | ||
method: | ||
* `insertBefore(newChild, refChild)` | ||
* `replaceChild(newChild, oldChild)` | ||
* `removeChild(oldChild)` | ||
* `appendChild(newChild)` | ||
* `hasChildNodes()` | ||
* `cloneNode(deep)` | ||
* `normalize()` | ||
* `isSupported(feature, version)` | ||
* `hasAttributes()` | ||
* [DOMException](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/ecma-script-binding.html) | ||
The DOMException class has the following constants (and `value` of type `Number`): | ||
1. `DOMException.INDEX_SIZE_ERR` (`1`) | ||
1. `DOMException.DOMSTRING_SIZE_ERR` (`2`) | ||
1. `DOMException.HIERARCHY_REQUEST_ERR` (`3`) | ||
1. `DOMException.WRONG_DOCUMENT_ERR` (`4`) | ||
1. `DOMException.INVALID_CHARACTER_ERR` (`5`) | ||
1. `DOMException.NO_DATA_ALLOWED_ERR` (`6`) | ||
1. `DOMException.NO_MODIFICATION_ALLOWED_ERR` (`7`) | ||
1. `DOMException.NOT_FOUND_ERR` (`8`) | ||
1. `DOMException.NOT_SUPPORTED_ERR` (`9`) | ||
1. `DOMException.INUSE_ATTRIBUTE_ERR` (`10`) | ||
1. `DOMException.INVALID_STATE_ERR` (`11`) | ||
1. `DOMException.SYNTAX_ERR` (`12`) | ||
1. `DOMException.INVALID_MODIFICATION_ERR` (`13`) | ||
1. `DOMException.NAMESPACE_ERR` (`14`) | ||
1. `DOMException.INVALID_ACCESS_ERR` (`15`) | ||
extends the Error type thrown as part of DOM API. | ||
readonly class properties: | ||
- `INDEX_SIZE_ERR` (`1`) | ||
- `DOMSTRING_SIZE_ERR` (`2`) | ||
- `HIERARCHY_REQUEST_ERR` (`3`) | ||
- `WRONG_DOCUMENT_ERR` (`4`) | ||
- `INVALID_CHARACTER_ERR` (`5`) | ||
- `NO_DATA_ALLOWED_ERR` (`6`) | ||
- `NO_MODIFICATION_ALLOWED_ERR` (`7`) | ||
- `NOT_FOUND_ERR` (`8`) | ||
- `NOT_SUPPORTED_ERR` (`9`) | ||
- `INUSE_ATTRIBUTE_ERR` (`10`) | ||
- `INVALID_STATE_ERR` (`11`) | ||
- `SYNTAX_ERR` (`12`) | ||
- `INVALID_MODIFICATION_ERR` (`13`) | ||
- `NAMESPACE_ERR` (`14`) | ||
- `INVALID_ACCESS_ERR` (`15`) | ||
The DOMException object has the following properties: | ||
code | ||
This property is of type Number. | ||
attributes: | ||
- `code` with a value matching one of the above constants. | ||
* extends the Error type thrown as part of DOM API: | ||
* [DOMImplementation](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-102161490) | ||
* [DOMImplementation](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-102161490) | ||
method: | ||
hasFeature(feature, version) | ||
createDocumentType(qualifiedName, publicId, systemId) | ||
createDocument(namespaceURI, qualifiedName, doctype) | ||
method: | ||
- `hasFeature(feature, version)` | ||
- `createDocumentType(qualifiedName, publicId, systemId)` | ||
- `createDocument(namespaceURI, qualifiedName, doctype)` | ||
* [Document](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#i-Document) : Node | ||
* [Document](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#i-Document) : Node | ||
readonly attribute: | ||
doctype|implementation|documentElement | ||
method: | ||
createElement(tagName) | ||
createDocumentFragment() | ||
createTextNode(data) | ||
createComment(data) | ||
createCDATASection(data) | ||
createProcessingInstruction(target, data) | ||
createAttribute(name) | ||
createEntityReference(name) | ||
getElementsByTagName(tagname) | ||
importNode(importedNode, deep) | ||
createElementNS(namespaceURI, qualifiedName) | ||
createAttributeNS(namespaceURI, qualifiedName) | ||
getElementsByTagNameNS(namespaceURI, localName) | ||
getElementById(elementId) | ||
readonly attribute: | ||
- `doctype` | `implementation` | `documentElement` | ||
method: | ||
- `createElement(tagName)` | ||
- `createDocumentFragment()` | ||
- `createTextNode(data)` | ||
- `createComment(data)` | ||
- `createCDATASection(data)` | ||
- `createProcessingInstruction(target, data)` | ||
- `createAttribute(name)` | ||
- `createEntityReference(name)` | ||
- `getElementsByTagName(tagname)` | ||
- `importNode(importedNode, deep)` | ||
- `createElementNS(namespaceURI, qualifiedName)` | ||
- `createAttributeNS(namespaceURI, qualifiedName)` | ||
- `getElementsByTagNameNS(namespaceURI, localName)` | ||
- `getElementById(elementId)` | ||
* [DocumentFragment](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-B63ED1A3) : Node | ||
* [Element](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-745549614) : Node | ||
* [DocumentFragment](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-B63ED1A3) : Node | ||
* [Element](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-745549614) : Node | ||
readonly attribute: | ||
tagName | ||
method: | ||
getAttribute(name) | ||
setAttribute(name, value) | ||
removeAttribute(name) | ||
getAttributeNode(name) | ||
setAttributeNode(newAttr) | ||
removeAttributeNode(oldAttr) | ||
getElementsByTagName(name) | ||
getAttributeNS(namespaceURI, localName) | ||
setAttributeNS(namespaceURI, qualifiedName, value) | ||
removeAttributeNS(namespaceURI, localName) | ||
getAttributeNodeNS(namespaceURI, localName) | ||
setAttributeNodeNS(newAttr) | ||
getElementsByTagNameNS(namespaceURI, localName) | ||
hasAttribute(name) | ||
hasAttributeNS(namespaceURI, localName) | ||
readonly attribute: | ||
- `tagName` | ||
* [Attr](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-637646024) : Node | ||
attribute: | ||
value | ||
readonly attribute: | ||
name|specified|ownerElement | ||
method: | ||
- `getAttribute(name)` | ||
- `setAttribute(name, value)` | ||
- `removeAttribute(name)` | ||
- `getAttributeNode(name)` | ||
- `setAttributeNode(newAttr)` | ||
- `removeAttributeNode(oldAttr)` | ||
- `getElementsByTagName(name)` | ||
- `getAttributeNS(namespaceURI, localName)` | ||
- `setAttributeNS(namespaceURI, qualifiedName, value)` | ||
- `removeAttributeNS(namespaceURI, localName)` | ||
- `getAttributeNodeNS(namespaceURI, localName)` | ||
- `setAttributeNodeNS(newAttr)` | ||
- `getElementsByTagNameNS(namespaceURI, localName)` | ||
- `hasAttribute(name)` | ||
- `hasAttributeNS(namespaceURI, localName)` | ||
* [NodeList](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-536297177) | ||
* [Attr](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-637646024) : Node | ||
attribute: | ||
- `value` | ||
readonly attribute: | ||
- `name` | `specified` | `ownerElement` | ||
* [NodeList](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-536297177) | ||
readonly attribute: | ||
length | ||
method: | ||
item(index) | ||
readonly attribute: | ||
- `length` | ||
method: | ||
- `item(index)` | ||
* [NamedNodeMap](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-1780488922) | ||
* [NamedNodeMap](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-1780488922) | ||
readonly attribute: | ||
length | ||
method: | ||
getNamedItem(name) | ||
setNamedItem(arg) | ||
removeNamedItem(name) | ||
item(index) | ||
getNamedItemNS(namespaceURI, localName) | ||
setNamedItemNS(arg) | ||
removeNamedItemNS(namespaceURI, localName) | ||
readonly attribute: | ||
- `length` | ||
method: | ||
- `getNamedItem(name)` | ||
- `setNamedItem(arg)` | ||
- `removeNamedItem(name)` | ||
- `item(index)` | ||
- `getNamedItemNS(namespaceURI, localName)` | ||
- `setNamedItemNS(arg)` | ||
- `removeNamedItemNS(namespaceURI, localName)` | ||
* [CharacterData](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-FF21A306) : Node | ||
* [CharacterData](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-FF21A306) : Node | ||
method: | ||
substringData(offset, count) | ||
appendData(arg) | ||
insertData(offset, arg) | ||
deleteData(offset, count) | ||
replaceData(offset, count, arg) | ||
method: | ||
- `substringData(offset, count)` | ||
- `appendData(arg)` | ||
- `insertData(offset, arg)` | ||
- `deleteData(offset, count)` | ||
- `replaceData(offset, count, arg)` | ||
* [Text](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-1312295772) : CharacterData | ||
method: | ||
splitText(offset) | ||
* [Text](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-1312295772) : CharacterData | ||
method: | ||
- `splitText(offset)` | ||
* [CDATASection](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-667469212) | ||
* [Comment](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-1728279322) : CharacterData | ||
* [CDATASection](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-667469212) | ||
* [Comment](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-1728279322) : CharacterData | ||
* [DocumentType](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-412266927) | ||
* [DocumentType](http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-412266927) | ||
readonly attribute: | ||
name|entities|notations|publicId|systemId|internalSubset | ||
readonly attribute: | ||
- `name` | `entities` | `notations` | `publicId` | `systemId` | `internalSubset` | ||
* Notation : Node | ||
* Notation : Node | ||
readonly attribute: | ||
publicId|systemId | ||
readonly attribute: | ||
- `publicId` | `systemId` | ||
* Entity : Node | ||
* Entity : Node | ||
readonly attribute: | ||
publicId|systemId|notationName | ||
readonly attribute: | ||
- `publicId` | `systemId` | `notationName` | ||
* EntityReference : Node | ||
* ProcessingInstruction : Node | ||
attribute: | ||
data | ||
readonly attribute: | ||
target | ||
* EntityReference : Node | ||
* ProcessingInstruction : Node | ||
attribute: | ||
- `data` | ||
readonly attribute: | ||
- `target` | ||
### DOM level 3 support: | ||
* [Node](http://www.w3.org/TR/DOM-Level-3-Core/core.html#Node3-textContent) | ||
* [Node](http://www.w3.org/TR/DOM-Level-3-Core/core.html#Node3-textContent) | ||
attribute: | ||
textContent | ||
method: | ||
isDefaultNamespace(namespaceURI){ | ||
lookupNamespaceURI(prefix) | ||
attribute: | ||
- `textContent` | ||
method: | ||
- `isDefaultNamespace(namespaceURI)` | ||
- `lookupNamespaceURI(prefix)` | ||
@@ -279,7 +299,5 @@ ### DOM extension by xmldom | ||
attribute: | ||
//Numbered starting from '1' | ||
lineNumber | ||
//Numbered starting from '1' | ||
columnNumber | ||
attribute: | ||
- `lineNumber` //number starting from `1` | ||
- `columnNumber` //number starting from `1` | ||
@@ -342,2 +360,2 @@ ## Specs | ||
There is an idea/proposal to make ti possible to replace it with something else in <https://github.com/xmldom/xmldom/issues/55> | ||
There is an idea/proposal to make it possible to replace it with something else in <https://github.com/xmldom/xmldom/issues/55> |
{ | ||
"name": "pdf2json", | ||
"version": "2.0.1", | ||
"version": "2.1.0", | ||
"description": "PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", | ||
@@ -8,2 +8,3 @@ "keywords": [ | ||
"pdf parser", | ||
"pdf2json", | ||
"convert pdf to json", | ||
@@ -38,4 +39,5 @@ "server side PDF parser", | ||
"parse-m": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", | ||
"parse-r": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -t -c -m -r", | ||
"parse-242": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc", | ||
"parse-r": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -t -c -m -r", | ||
"parse-fd": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/ -o ./test/target/fd/form -t -c -m -r", | ||
"parse-tb": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc", | ||
"parse-e": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc", | ||
@@ -49,2 +51,3 @@ "parse-e2": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc", | ||
}, | ||
"type": "module", | ||
"bin": { | ||
@@ -54,3 +57,3 @@ "pdf2json": "./bin/pdf2json" | ||
"dependencies": { | ||
"@xmldom/xmldom": "^0.7.5" | ||
"@xmldom/xmldom": "^0.8.2" | ||
}, | ||
@@ -57,0 +60,0 @@ "devDependencies": {}, |
@@ -1,2 +0,2 @@ | ||
const P2JCMD = require('./lib/p2jcmd'); | ||
new P2JCMD().start(); | ||
import PDFCLI from "./lib/p2jcmd.js"; | ||
new PDFCLI().start(); |
@@ -1,11 +0,11 @@ | ||
const fs = require("fs"), | ||
{ readFile } = require("fs/promises"), | ||
{EventEmitter} = require("events"), | ||
nodeUtil = require("util"), | ||
PDFJS = require("./lib/pdf"), | ||
{ParserStream} = require("./lib/parserstream"), | ||
{kColors, kFontFaces, kFontStyles} = require("./lib/pdfconst"); | ||
import fs from "fs"; | ||
import nodeUtil from "util"; | ||
import { readFile } from "fs/promises"; | ||
import { EventEmitter } from "events"; | ||
import PDFJS from "./lib/pdf.js"; | ||
import {ParserStream} from "./lib/parserstream.js"; | ||
import {kColors, kFontFaces, kFontStyles} from "./lib/pdfconst.js"; | ||
class PDFParser extends EventEmitter { // inherit from event emitter | ||
export default class PDFParser extends EventEmitter { // inherit from event emitter | ||
//public static | ||
@@ -165,4 +165,1 @@ static get colorDict() {return kColors; } | ||
} | ||
module.exports = PDFParser; | ||
# pdf2json | ||
pdf2json is a [node.js](http://nodejs.org/) module that parses and converts PDF from binary to json format, it's built with [pdf.js](https://github.com/mozilla/pdf.js/) and extends it with interactive form elements and text content parsing outside browser. | ||
pdf2json is a [node.js](http://nodejs.org/) module that parses and converts PDF from binary to json format, it's built with [pdf.js](https://github.com/mozilla/pdf.js/) and extends with interactive form elements and text content parsing outside browser. | ||
@@ -53,4 +53,4 @@ The goal is to enable server side PDF parsing with interactive form elements when wrapped in web service, and also enable parsing local PDF to json file when using as a command line utility. | ||
````javascript | ||
const fs = require('fs'), | ||
PDFParser = require("pdf2json"); | ||
import fs from "fs"; | ||
import PDFParser from "./pdfparser.js"; | ||
@@ -88,4 +88,4 @@ const pdfParser = new PDFParser(); | ||
````javascript | ||
const fs = require('fs'), | ||
PDFParser = require("pdf2json"); | ||
import fs from "fs"; | ||
import PDFParser from "./pdfparser.js"; | ||
@@ -105,4 +105,4 @@ const pdfParser = new PDFParser(this,1); | ||
````javascript | ||
const fs = require('fs'), | ||
PDFParser = require("pdf2json"); | ||
import fs from "fs"; | ||
import PDFParser from "./pdfparser.js"; | ||
@@ -122,4 +122,4 @@ const pdfParser = new PDFParser(); | ||
````javascript | ||
const fs = require('fs'), | ||
PDFParser = require("pdf2json"); | ||
import fs from "fs"; | ||
import PDFParser from "./pdfparser.js"; | ||
@@ -139,17 +139,39 @@ const inputStream = fs.createReadStream("./pdf2json/test/pdf/fd/form/F1040EZ.pdf", {bufferSize: 64 * 1024}); | ||
````javascript | ||
#generateMergedTextBlocksStream(callback) { | ||
const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".merged.json"), callback); | ||
this.pdfParser.getMergedTextBlocksStream().pipe(new StringifyStream()).pipe(outputStream); | ||
//private methods | ||
#generateMergedTextBlocksStream() { | ||
return new Promise( (resolve, reject) => { | ||
const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".merged.json"), resolve, reject); | ||
this.pdfParser.getMergedTextBlocksStream().pipe(new StringifyStream()).pipe(outputStream); | ||
}); | ||
} | ||
#generateRawTextContentStream(callback) { | ||
const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".content.txt"), callback); | ||
this.pdfParser.getRawTextContentStream().pipe(outputStream); | ||
#generateRawTextContentStream() { | ||
return new Promise( (resolve, reject) => { | ||
const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".content.txt"), resolve, reject); | ||
this.pdfParser.getRawTextContentStream().pipe(outputStream); | ||
}); | ||
} | ||
#generateFieldsTypesStream(callback) { | ||
const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".fields.json"), callback); | ||
this.pdfParser.getAllFieldsTypesStream().pipe(new StringifyStream()).pipe(outputStream); | ||
#generateFieldsTypesStream() { | ||
return new Promise( (resolve, reject) => { | ||
const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".fields.json"), resolve, reject); | ||
this.pdfParser.getAllFieldsTypesStream().pipe(new StringifyStream()).pipe(outputStream); | ||
}); | ||
} | ||
#processAdditionalStreams() { | ||
const outputTasks = []; | ||
if (PROCESS_FIELDS_CONTENT) {//needs to generate fields.json file | ||
outputTasks.push(this.#generateFieldsTypesStream()); | ||
} | ||
if (PROCESS_RAW_TEXT_CONTENT) {//needs to generate content.txt file | ||
outputTasks.push(this.#generateRawTextContentStream()); | ||
} | ||
if (PROCESS_MERGE_BROKEN_TEXT_BLOCKS) {//needs to generate json file with merged broken text blocks | ||
outputTasks.push(this.#generateMergedTextBlocksStream()); | ||
} | ||
return Promise.allSettled(outputTasks); | ||
} | ||
```` | ||
Note, if primary JSON parsing has exceptions, none of additional stream will be processed. | ||
See [p2jcmd.js](https://github.com/modesty/pdf2json/blob/master/lib/p2jcmd.js) for more details. | ||
@@ -388,3 +410,3 @@ | ||
````javascript | ||
const {kColors, kFontFaces, kFontStyles} = require("./lib/pdfconst"); | ||
import {kColors, kFontFaces, kFontStyles} from "./lib/pdfconst.js"; | ||
```` | ||
@@ -391,0 +413,0 @@ or via public static getters of PDFParser: |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Deprecated
MaintenanceThe maintainer of the package marked it as deprecated. This could indicate that a single version should not be used, or that the package is no longer maintained and any new vulnerabilities will not be fixed.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
1977024
68
47360
1007
Yes
1
7
Updated@xmldom/xmldom@^0.8.2