Comparing version 0.5.6 to 0.5.7
@@ -27,2 +27,4 @@ 'use strict'; | ||
var _iChars = "!@#$%^&*()+=[]\\\';,/{}|\":<>?~`.-_ "; | ||
var _continue = function(callback, err) { | ||
@@ -36,38 +38,36 @@ if (err) | ||
var _writeOneJSON = function(data, callback) { | ||
var self = this; | ||
var pJSON = JSON.stringify({"formImage":data}); | ||
fs.writeFile(self.outputPath, pJSON, function(err) { | ||
fs.writeFile(this.outputPath, pJSON, function(err) { | ||
if(err) { | ||
nodeUtil.p2jwarn("\n" + self.inputFile + " => " + self.outputFile + " Exception: " + err); | ||
self.curProcessor.failedCount++; | ||
nodeUtil.p2jwarn(this.inputFile + " => " + this.outputFile + " Exception: " + err); | ||
this.curProcessor.failedCount++; | ||
} else { | ||
nodeUtil.p2jinfo("\n" + self.inputFile + " => " + self.outputFile + " [" + self.outputDir + "] OK"); | ||
self.curProcessor.successCount++; | ||
nodeUtil.p2jinfo(this.inputFile + " => " + this.outputFile + " [" + this.outputDir + "] OK"); | ||
this.curProcessor.successCount++; | ||
} | ||
_continue.call(self, callback, err); | ||
}); | ||
_continue.call(this, callback, err); | ||
}.bind(this)); | ||
}; | ||
var _parseOnePDF = function(callback) { | ||
var self = this; | ||
self.pdfParser = new PFParser(); | ||
this.pdfParser = new PFParser(); | ||
self.pdfParser.on("pdfParser_dataReady", function (evtData) { | ||
this.pdfParser.on("pdfParser_dataReady", function (evtData) { | ||
if ((!!evtData) && (!!evtData.data)) { | ||
_writeOneJSON.call(self, evtData.data, callback); | ||
_writeOneJSON.call(this, evtData.data, callback); | ||
} | ||
else { | ||
self.curProcessor.failedCount++; | ||
_continue.call(self, callback, "Exception: empty parsing result - " + self.inputPath); | ||
this.curProcessor.failedCount++; | ||
_continue.call(this, callback, "Exception: empty parsing result - " + this.inputPath); | ||
} | ||
}); | ||
}.bind(this)); | ||
self.pdfParser.on("pdfParser_dataError", function (evtData) { | ||
self.curProcessor.failedCount++; | ||
this.pdfParser.on("pdfParser_dataError", function (evtData) { | ||
this.curProcessor.failedCount++; | ||
var errMsg = "Exception: " + evtData.data; | ||
_continue.call(self, callback, errMsg); | ||
}); | ||
_continue.call(this, callback, errMsg); | ||
}.bind(this)); | ||
self.pdfParser.loadPDF(self.inputPath, (_.has(argv, 's') ? 0 : 5)); | ||
this.pdfParser.loadPDF(this.inputPath, (_.has(argv, 's') ? 0 : 5)); | ||
}; | ||
@@ -118,3 +118,3 @@ | ||
else { | ||
nodeUtil.p2jinfo("\nTranscoding " + this.inputFile + " to - " + this.outputPath); | ||
nodeUtil.p2jinfo("Transcoding " + this.inputFile + " to - " + this.outputPath); | ||
fs.closeSync(fod); | ||
@@ -147,3 +147,10 @@ fs.unlinkSync(this.outputPath); | ||
else { | ||
_parseOnePDF.call(this, callback); | ||
var inputFile = path.basename(this.inputFile.toLowerCase(), '.pdf'); | ||
if (!inputFile || inputFile.length < 1 || _iChars.indexOf(inputFile.substr(0,1)) >= 0) { | ||
nodeUtil.p2jinfo("Skipped PDF " + this.inputFile + " - " + "invalid filename."); | ||
_continue.call(this, callback); | ||
} | ||
else { | ||
_parseOnePDF.call(this, callback); | ||
} | ||
} | ||
@@ -221,8 +228,7 @@ }; | ||
var self = this; | ||
process.nextTick( function() { | ||
console.timeEnd(_PRO_TIMER); | ||
var exitCode = (self.inputCount === self.successCount) ? 0 : 1; | ||
var exitCode = (this.inputCount === this.successCount) ? 0 : 1; | ||
process.exit(exitCode); | ||
}); | ||
}.bind(this)); | ||
}; | ||
@@ -240,30 +246,27 @@ | ||
cls.prototype.processFiles = function(inputDir, files) { | ||
var self = this; | ||
var fId = 0; | ||
self.p2j = new PDF2JSONUtil(inputDir, files[fId], self); | ||
self.p2j.processFile( function processPDFFile(err) { | ||
this.p2j = new PDF2JSONUtil(inputDir, files[fId], this); | ||
this.p2j.processFile( function processPDFFile(err) { | ||
if (err) { | ||
self.complete(err); | ||
this.complete(err); | ||
} | ||
else { | ||
fId++; | ||
if (fId >= self.inputCount) { | ||
self.complete(null); | ||
if (fId >= this.inputCount) { | ||
this.complete(null); | ||
} | ||
else { | ||
if (self.p2j) { | ||
self.p2j.destroy(); | ||
self.p2j = null; | ||
if (this.p2j) { | ||
this.p2j.destroy(); | ||
this.p2j = null; | ||
} | ||
self.p2j = new PDF2JSONUtil(inputDir, files[fId], self); | ||
self.p2j.processFile(processPDFFile); | ||
this.p2j = new PDF2JSONUtil(inputDir, files[fId], this); | ||
this.p2j.processFile(processPDFFile.bind(this)); | ||
} | ||
} | ||
}); | ||
}.bind(this)); | ||
}; | ||
cls.prototype.processOneDirectory = function () { | ||
var self = this; | ||
var inputDir = path.normalize(argv.f); | ||
@@ -276,11 +279,11 @@ | ||
self.inputCount = pdfFiles.length; | ||
if (self.inputCount > 0) { | ||
self.processFiles(inputDir, pdfFiles); | ||
this.inputCount = pdfFiles.length; | ||
if (this.inputCount > 0) { | ||
this.processFiles(inputDir, pdfFiles); | ||
} | ||
else { | ||
console.log("No PDF files found. [" + inputDir + "]."); | ||
self.complete(null); | ||
this.complete(null); | ||
} | ||
}); | ||
}.bind(this)); | ||
}; | ||
@@ -287,0 +290,0 @@ |
{ | ||
"name": "pdf2json", | ||
"_id": "pdf2json@0.5.6", | ||
"version": "0.5.6", | ||
"_id": "pdf2json@0.5.7", | ||
"version": "0.5.7", | ||
"description": "A PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", | ||
@@ -6,0 +6,0 @@ "keywords": [ |
@@ -632,2 +632,3 @@ Introduction | ||
v0.5.7 added the capability to skip input PDF files if filename begins with any one of "!@#$%^&*()+=[]\\\';,/{}|\":<>?~`.-_ ", usually these files are created by PDF authoring tools as backup files. | ||
@@ -634,0 +635,0 @@ Run in a RESTful Web Service |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
3264727
35895
644