pdf-powerpoint
Advanced tools
Comparing version 0.1.5 to 1.0.0
'use strict' | ||
var EventEmitter2 = require('eventemitter2').EventEmitter2 | ||
var exec = require('child_process').exec | ||
var fs = require('fs') | ||
@@ -8,4 +9,4 @@ var os = require('os') | ||
const _ = require('lodash') | ||
var officegen = require('officegen') | ||
var PDF2Images = require('pdf2images-multiple') | ||
var Promise = require('bluebird') | ||
@@ -16,3 +17,2 @@ var rmdir = require('rimraf') | ||
var debug = require('debug') | ||
var errorLogger = debug('pdfppt:error') | ||
var pdfLogger = debug('pdfppt:app') | ||
@@ -23,2 +23,5 @@ var debugLogger = debug('pdfppt:debug') | ||
/** allows the caller to provide a path to the GhostScript Executable */ | ||
const GS_PATH = process.env.PDF_PPT_GSPATH | ||
/** Going above 300 has a significant impact on performance | ||
@@ -32,3 +35,11 @@ * without much noticeable quality improvement */ | ||
constructor () { | ||
/** | ||
* | ||
* @param options | ||
* @param options.clean {boolean=true} set to false if intermediate image | ||
* files should be left on the filesystem. | ||
* @param options.jobId {string} if provided, this will be included in any logging output | ||
* | ||
*/ | ||
constructor (options) { | ||
super({ | ||
@@ -38,2 +49,5 @@ wildcard: true, // Allow clients to listen based on wildcard expressions | ||
}) | ||
this.options = options || {} | ||
this.clean = this.options.clean || true | ||
this.jobId = this.options.jobId || '' | ||
} | ||
@@ -43,17 +57,16 @@ | ||
* | ||
* @param pdfFile | ||
* @param pdfFiles {array|string} An array of PDF files that should be | ||
* converted | ||
* @param options | ||
* @param options.stagingDir - A directory where intermediate PNG images will | ||
* be placed when converting into slides. If a file with the same name | ||
* exists ImageMagick will not process that image again. Therefore, it is | ||
* recommended that a different folder be used for each conversion. If | ||
* undefined, a random directory will be created under the systems temp | ||
* directory. It will be deleted once the job has completed. | ||
* @param options.convertOptions - ImageMagick conversion options (minus the -) | ||
* Currently supported: density(<300) | ||
* be placed when converting into slides. A different folder should be | ||
* used for each conversion. If undefined, a random directory will be | ||
* created under the systems temp directory. It will be deleted once the | ||
* job has completed. | ||
* | ||
* @param options.convertOptions - ImageMagick conversion options (minus the | ||
* -) Currently supported: density(<300) | ||
* @param done | ||
*/ | ||
convertPDFToPowerpoint (pdfFile, options, done) { | ||
pdfLogger('converting', pdfFile) | ||
convertPDFToPowerpoint (pdfFiles, options, done) { | ||
let callback | ||
@@ -70,62 +83,53 @@ let opts = {} | ||
pdfLogger('options:', opts) | ||
// pdfLogger('options:', opts) | ||
const stagingDir = this._getStagingDirectory(opts.stagingDir) | ||
stagingDir.then((outputDir) => { | ||
var imgMagickOpts = this._getImageMagickOptions(this._getConvertOptions(opts), outputDir) | ||
var pdf2images = PDF2Images(pdfFile, imgMagickOpts) | ||
this._convert(pdf2images, outputDir, callback) | ||
this._convertWithGhostScript(outputDir, pdfFiles, options, callback) | ||
}, (err) => callback(err)) | ||
} | ||
_getConvertOptions (opts) { | ||
const convertOpts = opts.convertOptions || {} | ||
pdfLogger('pptx conversion options:', convertOpts) | ||
const convertOptions = { | ||
'-density': convertOpts.density | ||
? Math.min(convertOpts.density, DPI_DENSITY_MAX) | ||
: DPI_DENSITY_DEFAULT, | ||
// '-resize' : '800x600', | ||
// '-trim': '', | ||
// '-sharpen' : '0x1.0' | ||
'-quality': 100 | ||
} | ||
return convertOptions | ||
} | ||
/** | ||
* GhostScript can be invoked directly, since ImageMagick just delegates to it | ||
* | ||
* @param outputDir | ||
* @param pdfFiles | ||
* @param options | ||
* @param callback | ||
* @private | ||
*/ | ||
_convertWithGhostScript (outputDir, pdfFiles, options, callback) { | ||
var start = this.nowInMillis() | ||
_getImageMagickOptions (convertOptions, outputDir) { | ||
pdfLogger('Using staging directory:', outputDir) | ||
const options = { | ||
convert_options: convertOptions, // optional | ||
// convert_operators: convert_operators, //optional | ||
output_dir: path.resolve(outputDir, 'img'), // optional | ||
ext: 'png', // optional, png is the default value | ||
gm: false // Use GraphicksMagic //optional, false is the default value | ||
} | ||
pdfLogger('ImageMagick Options:', options) | ||
return options | ||
} | ||
const gsExecutable = os.platform() === 'win32' ? GS_PATH || 'gswin32c.exe' : 'gs' | ||
const imgDir = path.resolve(outputDir, 'img') | ||
const co = this._getConvertOptions(options) | ||
const gsCmdRoot = `"${gsExecutable}" -q -dQUIET -dSAFER -sDEVICE=pngalpha -dMaxBitmap=500000000 -r${co.density} -dUseArtBox` | ||
_convert (converter, outputDir, done) { | ||
pdfLogger('converting pdf into images...') | ||
var self = this | ||
var start = this.nowInMillis() | ||
var imgStart = start | ||
converter.pdf.convert((err, imagePath) => { | ||
if (err) { | ||
self.emit('err.png.single', {output: imagePath, error: err, time: this.elapsed(imgStart)}) | ||
errorLogger(err) | ||
// Get the image files for each PDF | ||
let gsErr = [] | ||
let requests = pdfFiles.map((pdfPath, pdfIndex) => { | ||
return new Promise((resolve) => { | ||
const gsCmd = gsCmdRoot + ` -o ${imgDir}/img-${pdfIndex}-%d.png ${pdfPath}` | ||
exec(gsCmd, (err, stdout, stderr) => { | ||
this.emit('done.gs.convert', { output: gsCmd, time: this.elapsed(start), error: err }) | ||
if (err) { | ||
gsErr.push(err) | ||
} | ||
resolve() | ||
}) | ||
}) | ||
}) | ||
// GS executes each PDF asynchronously, so we need a collection of promises | ||
// to wait for all image files to be present | ||
Promise.all(requests).then(() => { | ||
if (!_.isEmpty(gsErr)) { | ||
this.emit('err.png.all', {error: gsErr, time: this.elapsed(start)}) | ||
} else { | ||
self.emit('done.png.single', {output: imagePath, time: this.elapsed(imgStart)}) | ||
} | ||
imgStart = this.nowInMillis() | ||
// Will it be faster to add images here? | ||
}, (err, images) => { | ||
if (err) { | ||
done(err) | ||
self.emit('err.png.all', {output: images, error: err, time: this.elapsed(start)}) | ||
} else { | ||
self.emit('done.png.all', {output: images, time: this.elapsed(start)}) | ||
const images = fs.readdirSync(imgDir).map(f => `${imgDir}/${f}`) | ||
this.emit('done.png.all', {output: images, time: this.elapsed(start)}) | ||
var pptxOutput = path.resolve(outputDir, `output_${process.hrtime()[1]}.pptx`) | ||
this._aggregateSlides(images, pptxOutput, done, outputDir) | ||
this._aggregateSlides(images, pptxOutput, imgDir, callback) | ||
} | ||
@@ -135,8 +139,33 @@ }) | ||
_aggregateSlides (images, pptxOutput, done, outputDir) { | ||
_getConvertOptions (options) { | ||
const co = options.convertOptions || {} | ||
const o = {} | ||
/* Note: if the density is too low and there is a slide with a transparent background, | ||
The image may show a horizontal line on the slide when it is rendered in the PPTX. | ||
(was visible at 72, but not visible at 150) | ||
*/ | ||
o.density = co.density ? Math.min(co.density, DPI_DENSITY_MAX) : DPI_DENSITY_DEFAULT | ||
return o | ||
} | ||
/** | ||
* | ||
* @param {array} images | ||
* @param pptxOutput pptx file path | ||
* @param imgDir the directory where png files are generated | ||
* @param done callback | ||
* @private | ||
*/ | ||
_aggregateSlides (images, pptxOutput, imgDir, done) { | ||
this._createSlides(images, pptxOutput, (slideErr, output) => { | ||
pdfLogger('Finished rendering all slides') | ||
done(slideErr, output) | ||
rmdir(path.resolve(outputDir, 'img'), | ||
(e) => e && errorLogger('Could not delete working directory:', e)) | ||
if (this.clean) { | ||
var start = this.nowInMillis() | ||
rmdir(imgDir, (err) => { | ||
if (err) { | ||
this.emit('done.png.clean', {output: imgDir, time: this.elapsed(start), error: err}) | ||
pdfLogger(this.jobId, 'Could not delete working directory:', imgDir, err) | ||
} | ||
}) | ||
} | ||
}) | ||
@@ -146,3 +175,2 @@ } | ||
_createSlides (imageFiles, pptFile, done) { | ||
pdfLogger('Adding images to slides') | ||
var start = this.nowInMillis() | ||
@@ -161,3 +189,3 @@ var pptx = officegen('pptx') | ||
}) | ||
debugLogger('Sorted Images:', sortedImages) | ||
debugLogger(this.jobId, 'Sorted Images:', sortedImages) | ||
@@ -172,6 +200,6 @@ // TODO: Need a callback here if this blocks too long | ||
_savePresentationFile (pptFile, done, pptx) { | ||
var start = this.nowInMillis() | ||
var out = fs.createWriteStream(pptFile) | ||
out.on('close', () => { | ||
pdfLogger('Created the PPTX file:', pptFile) | ||
this.emit('done.pptx.saved', {output: pptFile}) | ||
this.emit('done.pptx.saved', {output: pptFile, time: this.elapsed(start)}) | ||
done(null, pptFile) | ||
@@ -187,3 +215,3 @@ }) | ||
if (err || !s.exists()) { | ||
pdfLogger('staging directory:', stagingDir, 'does not exist, creating a new one') | ||
pdfLogger(this.jobId, 'staging directory:', stagingDir, 'does not exist, creating a new one') | ||
return this._createTempStagingDirectory() | ||
@@ -218,5 +246,4 @@ } | ||
} | ||
} | ||
module.exports = Powerpoint |
{ | ||
"name": "pdf-powerpoint", | ||
"version": "0.1.5", | ||
"version": "1.0.0", | ||
"description": "Converts one or more PDFs into a powerpoint with one pdf page per slide", | ||
@@ -9,3 +9,4 @@ "main": "index.js", | ||
"fix": "standard --fix", | ||
"test": "standard && DEBUG='app:*' ava | tap-diff" | ||
"test": "standard && DEBUG='app:*' ava **/*-test.js | tap-diff", | ||
"testWin": "standard && ava **/*-test.js | tap-diff" | ||
}, | ||
@@ -41,5 +42,4 @@ "repository": { | ||
"officegen": "^0.4.0", | ||
"pdf2images-multiple": "^1.0.16", | ||
"rimraf": "^2.5.4" | ||
} | ||
} |
@@ -10,3 +10,3 @@ [![Build Status](https://travis-ci.org/SpiderStrategies/pdf-powerpoint.svg?branch=master)](https://travis-ci.org/SpiderStrategies/pdf-powerpoint) | ||
- ImageMagick is used to transform each page of a PDF into a PNG image. | ||
- GhostScript is used to transform each page of a PDF into a PNG image. | ||
- Each single images is added to a slide in the powerpoint presentation. | ||
@@ -18,4 +18,4 @@ - Slides are in the order of the PDFs passed in the array | ||
**Required packages:** | ||
- Debian: `apt-get install -y imagemagick ghostscript poppler-utils GraphicsMagick` | ||
- OSX: `brew install imagemagick poppler` | ||
- Debian: `apt-get install -y ghostscript` | ||
- OSX: `brew install ghostscript` | ||
@@ -39,3 +39,3 @@ ### Usage | ||
- If not provided the default is to use `fs.mkdtemp('${os.tmpdir()}/pdf_ppt_')` to generate a random temp directory | ||
* `convertOptions` - These are used for ImageMagick conversion | ||
* `convertOptions` - These are used for Ghostscript conversion | ||
- `density` - specifies the PPI setting for the output image | ||
@@ -52,3 +52,3 @@ - default: 72, maximum value allowed is 300 | ||
Events emit an object that may have the following properties: | ||
* `error` - if an error occured | ||
* `error` - if an error occurred | ||
* `time` - if the event marks the end of a corresponding start event | ||
@@ -59,6 +59,6 @@ * `output` - If there is a PNG or PPTX file generated from the event | ||
1. `err.png.single` | ||
1. `done.png.single` - `output` is the path to the png file | ||
1. `done.gs.convert`- `output` is the GhostScript command that was executed | ||
1. `err.png.all` | ||
1. `done.png.all` - `output` is an array of paths to images generated from PDF | ||
1. `done.png.clean` - `output` is the image directory that was deleted | ||
1. `done.pptx.creation` - powerpoint is complete in memory, all images added to slides | ||
@@ -71,6 +71,27 @@ 1. `done.pptx.saved` - `output` is the pptFile | ||
* pdfppt:error | ||
* pdfppt:app | ||
* pdfppt:debug | ||
This can be turned on by setting `DEBUG=pdfppt:*`, read more about [Debug here](https://www.npmjs.com/package/debug) | ||
This can be turned on by setting `DEBUG=pdfppt:*`, read more about [Debug here](https://www.npmjs.com/package/debug) | ||
### Implementation | ||
#### ImageMagick delegates to GhostScript for PDF -> PNG conversion | ||
- ImageMagick: `convert -density 72 -quality 100 -verbose '/var/folders/dr/f1q4znd96xv8wp82y4cfgg700000gn/T/833198680xmyTzU/output.pdf[4]' '/var/folders/dr/f1q4znd96xv8wp82y4cfgg700000gn/T/pdf_ppt_Tl9eSm/img/output-4.png'` | ||
- GhostScript: `'gs' -q -dQUIET -dSAFER -dBATCH -dNOPAUSE -dNOPROMPT -dMaxBitmap=500000000 -dAlignToPixels=0 -dGridFitTT=2 '-sDEVICE=pngalpha' -dTextAlphaBits=4 -dGraphicsAlphaBits=4 '-r72x72' -dFirstPage=5 -dLastPage=5 '-sOutputFile=/var/tmp/magick-94224ozuZS3iFphAj%d' '-f/var/tmp/magick-94224zWXBFMw8ZiEA' '-f/var/tmp/magick-9422413LS3T1dhoL4'` | ||
#### So GhostScript is used directly | ||
*Note:* You must ensure that GhostScript is installed on your system, it is not included with this package. | ||
If you have GhostScript installed globally on your system it should be located automatically, but if not you can provide the path to the GhostScript executable by setting the `PDF_PPT_GSPATH` environment variable. | ||
[GhostScript Option Documentation](https://ghostscript.com/doc/current/Use.htm) | ||
The following command is generated: `gs -q -dQUIET -sDEVICE=pngalpha -r150 -o outputFile-%d.png` | ||
- As a convenient shorthand you can use the `-o option` followed by the output file specification as discussed above. The -o option also sets the `-dBATCH` and `-dNOPAUSE` options. | ||
- `-q` Quiet startup: suppress normal startup messages, and also do the equivalent of -dQUIET. | ||
- `-dQUIET` Suppresses routine information comments on standard output. | ||
- `-sDEVICE=pngalpha` | ||
- `-r[XResxYRes]` Useful for controlling the density of pixels when rasterizing to an image file. It is the requested number of dots (or pixels) per inch. Where the two resolutions are same, as is the common case, you can simply use -rres. |
@@ -6,3 +6,4 @@ import {test} from 'ava' | ||
test.cb('image is resized to fit on slide', t => { | ||
// Need to set GS Path for this to work | ||
test.skip.cb('image is resized to fit on slide', t => { | ||
const options = {} | ||
@@ -20,3 +21,3 @@ p.convertPDFToPowerpoint('google-l.pdf', options, (err, result) => { | ||
const o = p._getConvertOptions({ convertOptions: { 'density': 600 } }) | ||
t.is(o['-density'], 300) | ||
t.is(o['density'], 300) | ||
}) | ||
@@ -26,3 +27,3 @@ | ||
const o = p._getConvertOptions({}) | ||
t.is(o['-density'], 72) | ||
t.is(o['density'], 72) | ||
}) |
Shell access
Supply chain riskThis module accesses the system shell. Accessing the system shell increases the risk of executing arbitrary code.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 1 instance in 1 package
No v1
QualityPackage is not semver >=1. This means it is not stable and does not support ^ ranges.
Found 1 instance in 1 package
99607
5
233
0
92
2
1
- Removedpdf2images-multiple@^1.0.16
- Removedassertion-error@1.0.0(transitive)
- Removedchai@1.9.2(transitive)
- Removeddeep-eql@0.1.3(transitive)
- Removedes6-promise@2.0.1(transitive)
- Removedpdf-image@1.1.0(transitive)
- Removedpdf2images-multiple@1.0.16(transitive)
- Removedtype-detect@0.1.1(transitive)