Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

pdf-powerpoint

Package Overview
Dependencies
Maintainers
1
Versions
13
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

pdf-powerpoint - npm Package Compare versions

Comparing version 1.1.3 to 1.2.0

test/powerpoint-test-inkscape.js

297

lib/powerpoint.js

@@ -5,2 +5,3 @@ 'use strict'

var exec = require('child_process').exec
var spawn = require('child_process').spawn
var fs = require('fs')

@@ -22,2 +23,6 @@ var os = require('os')

/** allows the caller to provide a path to the Inkscape Executable */
const INKSCAPE_PATH = process.env.PDF_PPT_INKSCAPE_PATH
/** allows the caller to provide a path to the MuPdf Executable */
const MUPDF_PATH = process.env.PDF_PPT_MUPDF_PATH
/** allows the caller to provide a path to the GhostScript Executable */

@@ -28,4 +33,6 @@ const GS_PATH = process.env.PDF_PPT_GSPATH

const gsExecutable = os.platform() === 'win32' ? GS_PATH || 'gswin32c.exe' : 'gs'
const imageMagickConvert = os.platform() === 'win32' ? IM_PATH || 'convert.exe' : 'convert'
let gsExecutable
let mupdfExecutable
let inkScapeExecutable
let imageMagickConvert

@@ -52,8 +59,12 @@ /** Going above 300 has a significant impact on performance

* files should be left on the filesystem.
* @param options.jobId {string} if provided, this will be included in any
* @param options.jobId {String} if provided, this will be included in any
* logging output
* @param {boolean} [options.cropLastImage=false] requires ImageMagick `convert` to be on the path. Will crop the last pdf image before placing on slide.
* @param {number} [options.dimensions.width=800] of slides in pixels
* @param {number} [options.dimensions.height=600] of slides in pixels
* @param {string} [options.dimensions.type=screen4x3] '35mm' 'A3' 'A4' 'B4ISO' 'B4JIS' 'B5ISO' 'B5JIS' 'banner' 'custom' 'hagakiCard' 'ledger' 'letter' 'overhead' 'screen16x10' 'screen16x9' 'screen4x3'
* @param {boolean} [options.cropLastImage=false] requires ImageMagick
* `convert` to be on the path. Will crop the last pdf image before
* placing on slide.
* @param {Number} [options.dimensions.width=800] of slides in pixels
* @param {Number} [options.dimensions.height=600] of slides in pixels
* @param {String} [options.dimensions.type=screen4x3] '35mm' 'A3' 'A4'
* 'B4ISO' 'B4JIS' 'B5ISO' 'B5JIS' 'banner' 'custom' 'hagakiCard' 'ledger'
* 'letter' 'overhead' 'screen16x10' 'screen16x9' 'screen4x3'
*

@@ -66,2 +77,3 @@ */

})
this._setExecutables()
this.options = _.merge({}, defaultOptions, options)

@@ -74,2 +86,27 @@ process.nextTick(() => {

/**
* Inspects any env property that may have been set, and changes the default
* based on operating system if not set.
* @private
*/
_setExecutables () {
const p = os.platform()
// Linux
gsExecutable = GS_PATH || 'gs'
mupdfExecutable = MUPDF_PATH || 'mudraw'
inkScapeExecutable = INKSCAPE_PATH || 'inkscape'
imageMagickConvert = IM_PATH || 'convert'
// Windows
if (p === 'win32') {
gsExecutable = GS_PATH || 'gswin32c.exe'
mupdfExecutable = MUPDF_PATH || 'mutool.exe draw'
imageMagickConvert = IM_PATH || 'convert.exe'
inkScapeExecutable = INKSCAPE_PATH || 'inkscape'
}
// Mac OS
if (p === 'darwin') {
mupdfExecutable = MUPDF_PATH || 'mutool draw'
}
}
/**
*

@@ -103,9 +140,143 @@ * @param pdfFiles {array|string} An array of PDF files that should be

const stagingDir = this._getStagingDirectory(opts.stagingDir)
stagingDir.then((outputDir) => {
this._convertWithGhostScript(outputDir, pdfFiles, options, callback)
}, (err) => callback(err))
this._getStagingDirectory(opts.stagingDir)
.then((outputDir) => {
this.imgDir = path.resolve(outputDir, 'img')
this.pdfDir = path.resolve(outputDir, 'pdf')
let convertPromise
if (this.options.engine === 'inkscape') {
convertPromise = this._convertWithInkscape(outputDir, pdfFiles, options)
} else if (this.options.engine === 'mupdf') {
convertPromise = this._convertWithMuPDF(outputDir, pdfFiles, options)
} else {
convertPromise = this._convertWithGhostScript(outputDir, pdfFiles, options)
}
convertPromise.then(sortedImages => {
this._createPowerpoint(outputDir, sortedImages, callback)
})
}, (err) => callback(err))
.catch(err => callback(err))
}
_convertWithInkscape (outputDir, pdfFiles, options) {
// Split the PDFs
const splitTasks = this._getSplitTasks(pdfFiles)
return Promise.all(splitTasks).then(() => {
return this._readPdfDirectory().then(singlePagePdfFiles => {
const sortedSinglePDFs = this._sortPages(singlePagePdfFiles)
return this._executeInkscape(sortedSinglePDFs, options)
})
})
}
_executeInkscape (sortedSinglePDFs, options) {
const sortedImages = []
const co = this._getConvertOptions(options)
const commands = _.map(sortedSinglePDFs, (pdfFile) => {
const pngFile = `${path.basename(pdfFile, '.pdf')}.png`
const pngPath = path.join(this.imgDir, pngFile)
sortedImages.push(pngPath)
return `-d ${co.density} --export-png=${pngPath} ${pdfFile}`
})
const inkTasks = this._getInkscapeExportTasks(commands)
const start = this.nowInMillis()
return Promise.all(inkTasks).then(() => {
this.emit('done.inkscape.export.all', {time: this.elapsed(start)})
return Promise.resolve(sortedImages)
})
// return this._spawnInkscapeShell(sortedImages, commands).then(() => sortedImages)
}
_getInkscapeExportTasks (commands) {
return commands.map((cmd) => {
return new Promise((resolve, reject) => {
const fullCmd = `${inkScapeExecutable} ${cmd}`
// Including timings here isn't useful because these promises run concurrently
exec(fullCmd, (err, stdout, stderr) => {
this.emit('done.inkscape.export', { output: fullCmd, error: err })
if (err) {
reject(err)
}
resolve()
})
})
})
}
/**
* Executes inkscape export commands in a single reusable shell.
* In theory this should be more efficient, but it runs serially and ends
* up taking a lot longer.
*
* @param {Array} commands inkscape export commands
* @returns {Promise}
* @private
*/
_spawnInkscapeShell (commands) {
return new Promise((resolve, reject) => {
const inkProc = spawn(inkScapeExecutable, ['--shell'])
inkProc.stdout.on('data', d => {
// Each export shell command writes 3 lines to stdout, this is all we have to
// signal that a single conversion was completed. The `Bitmap saved` line is the
// most contextual (and last) line, so that is what is logged
const msg = d.toString()
if (_.startsWith(msg, 'Bitmap saved as:')) {
this.emit('done.inkscape.export',
{output: msg, time: this.elapsed(inkCmdStart)})
inkCmdStart = this.nowInMillis()
}
})
inkProc.on('error', e => {
pdfLogger('Inkscape conversion failed:', e)
reject(e)
})
inkProc.on('exit', () => {
this.emit('done.inkscape.export.all', {time: this.elapsed(inkExecStart)})
resolve()
})
// Run all the conversions in the shell
const inkShellCmd = _.join(commands, ' \n') + ' \nquit\n'
pdfLogger('Inkscape shell commands:', inkShellCmd)
let inkExecStart = this.nowInMillis()
let inkCmdStart = inkExecStart
inkProc.stdin.write(inkShellCmd)
})
}
/**
* Reads the contents of the staging directory that contains the PDF
* files after they are split using pdfseparate. This is because we have
* no idea how many files there will be, and they need to be sorted properly.
*
* @returns {Promise} fulfilled with list of filenames
*/
_readPdfDirectory () {
return new Promise((resolve, reject) => {
fs.readdir(this.pdfDir, (err, files) => {
if (err) {
reject(err)
}
resolve(files.map(f => `${this.pdfDir}/${f}`))
})
})
}
_getSplitTasks (pdfFiles) {
return pdfFiles.map((pdfPath, pdfIndex) => {
return new Promise((resolve, reject) => {
const splitCmd = `pdfseparate ${pdfPath} ${this.pdfDir}/pdf-${pdfIndex}-%d.pdf`
const splitStart = this.nowInMillis()
exec(splitCmd, (err, stdout, stderr) => {
this.emit('done.pdf.separate', { output: splitCmd, time: this.elapsed(splitStart), error: err })
if (err) {
reject(err)
}
resolve()
})
})
})
}
/**
* GhostScript can be invoked directly, since ImageMagick just delegates to it

@@ -116,9 +287,6 @@ *

* @param options
* @param callback
* @private
*/
_convertWithGhostScript (outputDir, pdfFiles, options, callback) {
_convertWithGhostScript (outputDir, pdfFiles, options) {
const start = this.nowInMillis()
const imgDir = path.resolve(outputDir, 'img')
const co = this._getConvertOptions(options)

@@ -131,6 +299,6 @@

let requests = pdfFiles.map((pdfPath, pdfIndex) => {
let tasks = pdfFiles.map((pdfPath, pdfIndex) => {
return new Promise((resolve) => {
const imgPrefix = `img-${pdfIndex}-`
const gsCmd = gsCmdRoot + ` -o ${imgDir}/${imgPrefix}%d.png ${pdfPath}`
const gsCmd = gsCmdRoot + ` -o ${this.imgDir}/${imgPrefix}%d.png ${pdfPath}`
const gsStart = this.nowInMillis()

@@ -147,20 +315,56 @@ exec(gsCmd, (err, stdout, stderr) => {

// GS executes each PDF asynchronously, so we need a collection of promises
// to wait for all image files to be present
Promise.all(requests).then(() => {
if (!_.isEmpty(gsErr)) {
this.emit('err.png.all', {error: gsErr, time: this.elapsed(start)})
return this.processImgConversionTasks(tasks, gsErr, start)
}
/**
* @param {Array} tasks list of promises
* @param {Array} errors
* @param {Number} startedAt timestamp of when conversion started
* @returns {Promise.<String[]>} sorted images ready for pptx slides
*/
processImgConversionTasks (tasks, errors, startedAt) {
return Promise.all(tasks).then(() => {
if (!_.isEmpty(errors)) {
this.emit('err.png.all', {error: errors, time: this.elapsed(startedAt)})
return Promise.reject(errors)
} else {
const imagesFiles = fs.readdirSync(imgDir).map(f => `${imgDir}/${f}`)
const sortedImages = this._sortImages(imagesFiles)
this.emit('done.png.all', {output: sortedImages, time: this.elapsed(start)})
const imagesFiles = fs.readdirSync(this.imgDir).map(f => `${this.imgDir}/${f}`)
const sortedImages = this._sortPages(imagesFiles)
this.emit('done.png.all', {output: sortedImages, time: this.elapsed(startedAt)})
return this._cropLastImages(sortedImages).then(() => sortedImages)
}
})
}
this._cropLastImages(sortedImages).then(() => {
var pptxOutput = path.resolve(outputDir, `output_${process.hrtime()[1]}.pptx`)
this._aggregateSlides(sortedImages, pptxOutput, imgDir, callback)
_convertWithMuPDF (outputDir, pdfFiles, options) {
const start = this.nowInMillis()
const co = this._getConvertOptions(options)
const cmdRoot = `${mupdfExecutable} -r ${co.density}`
// Get the image files for each PDF
let errors = []
let tasks = pdfFiles.map((pdfPath, pdfIndex) => {
return new Promise((resolve) => {
const imgPrefix = `img-${pdfIndex}-`
const cmd = cmdRoot + ` -o ${this.imgDir}/${imgPrefix}%d.png ${pdfPath}`
const muStart = this.nowInMillis()
exec(cmd, (err, stdout, stderr) => {
this.emit('done.mupdf.convert', { output: cmd, time: this.elapsed(muStart), error: err })
if (err) {
errors.push(err)
}
resolve()
})
}
})
})
return this.processImgConversionTasks(tasks, errors, start)
}
_createPowerpoint (outputDir, sortedImages, callback) {
var pptxOutput = path.resolve(outputDir, `output_${process.hrtime()[1]}.pptx`)
this._aggregateSlides(sortedImages, pptxOutput, this.imgDir, callback)
}
_getConvertOptions (options) {

@@ -191,4 +395,4 @@ const co = options.convertOptions || {}

* @returns {Promise} when all conversions have been done, and events emitted
* Any errors will be logged, but a rejection will not occur. It is better to get
* an output with a line than no output at all.
* Any errors will be logged, but a rejection will not occur. It is better
* to get an output with a line than no output at all.
*

@@ -199,3 +403,3 @@ * @private

if (!this.options.cropLastImage) {
return new Promise(resolve => resolve())
return Promise.resolve()
}

@@ -243,3 +447,4 @@

* @param sortedImages sorted by file and page/image number
* @returns {array} of files from sortedImages that are the last image for each file
* @returns {Array} of files from sortedImages that are the last image for
* each file
* @private

@@ -250,3 +455,3 @@ */

return sortedImages.reduce((acc, val, i, arr) => {
const fileAndPage = /.*img-(\d*)-(\d*).*/.exec(val)
const fileAndPage = /.*[pdf|img]-(\d*)-(\d*).*/.exec(val)
const file = fileAndPage[1]

@@ -271,3 +476,3 @@

*
* @param {array} images
* @param {Array} images
* @param pptxOutput pptx file path

@@ -309,14 +514,17 @@ * @param imgDir the directory where png files are generated

_sortImages (imageFiles) {
_sortPages (imageFiles) {
// Example: /var/folders/dr/f1q4znd96xv8wp82y4cfgg700000gn/T/pdf_ppt_5tz0dw/img/img-5-10.png
const rex = /.*img-(\d*)-(\d*).*/
// File = 5, Page = 10
const rex = /.*(img|pdf)-(\d*)-(\d*).*/
return imageFiles.sort((a, b) => {
let aGrps = rex.exec(a)
let bGrps = rex.exec(b)
// PDF Sequence + Page Sequence Comparison
let pageComp = aGrps[1] - bGrps[1]
if (pageComp === 0) {
return aGrps[2] - bGrps[2]
// PDF File Sequence + Page Sequence Comparison
const fileGrp = 2
const pageGrp = 3
let fileComp = aGrps[fileGrp] - bGrps[fileGrp]
if (fileComp === 0) {
return aGrps[pageGrp] - bGrps[pageGrp]
}
return pageComp
return fileComp
})

@@ -364,3 +572,6 @@ }

if (err) reject(err)
resolve(folder)
fs.mkdir(path.resolve(folder, 'pdf'), (err) => {
if (err) reject(err)
resolve(folder)
})
})

@@ -367,0 +578,0 @@ }

2

package.json
{
"name": "pdf-powerpoint",
"version": "1.1.3",
"version": "1.2.0",
"description": "Converts one or more PDFs into a powerpoint with one pdf page per slide",

@@ -5,0 +5,0 @@ "main": "index.js",

@@ -6,34 +6,64 @@ [![Build Status](https://travis-ci.org/SpiderStrategies/pdf-powerpoint.svg?branch=master)](https://travis-ci.org/SpiderStrategies/pdf-powerpoint)

## PDF to Powerpoint Converter
# PDF to Powerpoint Converter
A NPM module that accepts one or more PDF files and converts them into Powerpoint slides.
- GhostScript is used to transform each page of a PDF into a PNG image.
### General workflow
- A rendering engine is used to transform each page of a PDF into a PNG image.
- Each single images is added to a slide in the powerpoint presentation.
- Slides are in the order of the PDFs passed in the array
### PDF Rendering engines
Based on the requirements of your application, one rendering engine may be more appropriate
than another. This library currently supports three options. In all cases, you must ensure
the binaries are installed for your runtime, they are not packaged with this module.
It is recommended you weigh the runtime performance and output quality of each engine for
the content you are converting.
1. GhostScript - Converts a PDF into PNGs with one command per PDF
- Debian: `apt-get install -y ghostscript`
- OSX: `brew install ghostscript`
1. MuPDF - Converts a PDF into PNGs with one command per PDF
- Debian: `apt-get install -y mupdf-tools`
- OSX: `brew install ghostscript`
1. Inkscape - Separates PDFs into single page PDFs and then converts each PDF into PNG
- Debian: `apt-get install -y inkscape`
- OSX: `brew install inkscape`
**Supported Runtimes:** Node > 5.10.0
**Required packages:**
- Debian: `apt-get install -y ghostscript`
- OSX: `brew install ghostscript`
## Usage
### Usage
```javascript
import {Powerpoint} from 'pdf-powerpoint'
const p = new Powerpoint()
const p = new Powerpoint([options])
````
#### Constructor Options
* `clean` - set to false if intermediate image files should be left on the filesystem.
* `jobId` - if provided, this will be included in any logging output
* `cropLastImage` requires ImageMagick `convert` to be on the path. Will crop the last pdf image before placing on slide, sometimes a line would show up if the last PDF page was a partial page.
* `dimensions`
- width - of slides in pixels (default: 800)
- height - of slides in pixels (default: 600)
- type - options: '35mm' 'A3' 'A4', 'B4ISO' 'B4JIS' 'B5ISO' 'B5JIS' 'banner' 'custom' 'hagakiCard' 'ledger', 'letter' 'overhead' 'screen16x10' 'screen16x9' 'screen4x3' (default)
```javascript
p.convertPDFToPowerpoint('input.pdf', [options,] (err, result) => {
//Do something with the result (filepath to output)
})
```
### Options
#### Convert Options
* `stagingDir` - This is where the pptx file will be generated.
- `engine`
- 'ghostscript' (default)
- 'mupdf'
- 'inkscape'
- `stagingDir` - This is where the pptx file will be generated.
- Images go in `stagingDir/img` and are automatically removed once the powerpoint file is generated.
- If not provided the default is to use `fs.mkdtemp('${os.tmpdir()}/pdf_ppt_')` to generate a random temp directory
* `convertOptions` - These are used for Ghostscript conversion
- `convertOptions` - These are used for Ghostscript conversion
- `density` - specifies the PPI setting for the output image

@@ -50,15 +80,29 @@ - default: 72, maximum value allowed is 300

Events emit an object that may have the following properties:
* `error` - if an error occurred
* `time` - if the event marks the end of a corresponding start event
* `output` - If there is a PNG or PPTX file generated from the event
- `error` - if an error occurred
- `time` - if the event marks the end of a corresponding start event
- `output` - If there is a PNG or PPTX file generated from the event
#### Event Names
1. `done.gs.convert`- `output` is the GhostScript command that was executed
1. `err.png.all`
1. `done.png.all` - `output` is an array of paths to images generated from PDF
1. `done.png.clean` - `output` is the image directory that was deleted
1. `done.pptx.creation` - powerpoint is complete in memory, all images added to slides
1. `done.pptx.saved` - `output` is the pptFile
- `err.png.all`
- `done.png.all` - `output` is an array of paths to images generated from PDF
- `done.png.clean` - `output` is the image directory that was deleted
- `done.pptx.creation` - powerpoint is complete in memory, all images added to slides
- `done.pptx.saved` - `output` is the pptFile
##### Inkscape Engine
- `done.pdf.separate` - `output` is the command executed
- `done.inkscape.export` - after each inkscape conversion completes, `output` is the command that was executed
- `done.inkscape.export.all` - after all inkscape conversions are complete
##### GhostScript Engine
- `done.gs.convert`- `output` is the GhostScript command that was executed
- Only when `cropLastImage` option is set
- `done.im.convert` - after the last image of each PDF is converted
- `done.im.convert.all` - after all images are cropped
- `err.im.convert` - if any of the image cropping operations fails
##### MuPDF Engine
1. `done.mupdf.convert` - `output` is the MuPDF (mudraw) command that was executed
### Logging

@@ -73,18 +117,14 @@

### Implementation
## Developer Guide
#### ImageMagick delegates to GhostScript for PDF -> PNG conversion
This library originally used ImageMagick but it was discovered that ImageMagick delegates to GhostScript for PDF -> PNG conversion, so GhostScript is used directly
- ImageMagick: `convert -density 72 -quality 100 -verbose '/var/folders/dr/f1q4znd96xv8wp82y4cfgg700000gn/T/833198680xmyTzU/output.pdf[4]' '/var/folders/dr/f1q4znd96xv8wp82y4cfgg700000gn/T/pdf_ppt_Tl9eSm/img/output-4.png'`
- GhostScript: `'gs' -q -dQUIET -dSAFER -dBATCH -dNOPAUSE -dNOPROMPT -dMaxBitmap=500000000 -dAlignToPixels=0 -dGridFitTT=2 '-sDEVICE=pngalpha' -dTextAlphaBits=4 -dGraphicsAlphaBits=4 '-r72x72' -dFirstPage=5 -dLastPage=5 '-sOutputFile=/var/tmp/magick-94224ozuZS3iFphAj%d' '-f/var/tmp/magick-94224zWXBFMw8ZiEA' '-f/var/tmp/magick-9422413LS3T1dhoL4'`
- ImageMagick: `convert -density 72 -quality 100 -verbose '/tmp/output.pdf[4]' '/tmp/img/output-4.png'`
- Results in (GhostScript): `'gs' -q -dQUIET -dSAFER -dBATCH -dNOPAUSE -dNOPROMPT -dMaxBitmap=500000000 -dAlignToPixels=0 -dGridFitTT=2 '-sDEVICE=pngalpha' -dTextAlphaBits=4 -dGraphicsAlphaBits=4 '-r72x72' -dFirstPage=5 -dLastPage=5 '-sOutputFile=/tmp/magick-94224ozuZS3iFphAj%d' '-f/tmp/magick-94224zWXBFMw8ZiEA' '-f/tmp/magick-9422413LS3T1dhoL4'`
#### So GhostScript is used directly
*Note:* You must ensure that GhostScript is installed on your system, it is not included with this package.
If you have GhostScript installed globally on your system it should be located automatically, but if not you can provide the path to the GhostScript executable by setting the `PDF_PPT_GSPATH` environment variable.
[GhostScript Option Documentation](https://ghostscript.com/doc/current/Use.htm)
The following command is generated: `gs -q -dQUIET -sDEVICE=pngalpha -r150 -o outputFile-%d.png`
This module uses the following command: `gs -q -dQUIET -sDEVICE=pngalpha -r150 -o outputFile-%d.png`
#### GhostScript Tips (not this module's API)
- As a convenient shorthand you can use the `-o option` followed by the output file specification as discussed above. The -o option also sets the `-dBATCH` and `-dNOPAUSE` options.

@@ -91,0 +131,0 @@ - `-q` Quiet startup: suppress normal startup messages, and also do the equivalent of -dQUIET.

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc