@nosferatu500/textract
Advanced tools
Comparing version 3.0.1 to 3.0.2
@@ -42,3 +42,4 @@ var { spawn } = require( 'child_process' ) | ||
exec( 'textutil ' + __filename, | ||
exec( | ||
'textutil ' + __filename, | ||
function( error /* , stdout, stderr */ ) { | ||
@@ -51,3 +52,4 @@ var msg; | ||
cb( error === null, msg ); | ||
}); | ||
} | ||
); | ||
} | ||
@@ -54,0 +56,0 @@ |
@@ -10,3 +10,4 @@ var { exec } = require( 'child_process' ) | ||
exec( 'antiword -m UTF-8.txt "' + filePath + '"', | ||
exec( | ||
'antiword -m UTF-8.txt "' + filePath + '"', | ||
execOptions, | ||
@@ -27,3 +28,4 @@ function( error, stdout /* , stderr */ ) { | ||
} | ||
}); | ||
} | ||
); | ||
} | ||
@@ -42,3 +44,4 @@ | ||
exec( 'antiword -m UTF-8.txt ' + __filename, | ||
exec( | ||
'antiword -m UTF-8.txt ' + __filename, | ||
execOptions, | ||
@@ -55,3 +58,4 @@ function( error /* , stdout, stderr */ ) { | ||
} | ||
}); | ||
} | ||
); | ||
} | ||
@@ -58,0 +62,0 @@ |
var xpath = require( 'xpath' ) | ||
, Dom = require( 'xmldom' ).DOMParser | ||
, Dom = require( '@xmldom/xmldom' ).DOMParser | ||
, yauzl = require( 'yauzl' ) | ||
@@ -16,5 +16,3 @@ , util = require( '../util' ) | ||
paragraph = new Dom().parseFromString( paragraph.toString() ); | ||
ts = xpath.select( | ||
"//*[local-name()='t' or local-name()='tab' or local-name()='br']", paragraph | ||
); | ||
ts = xpath.select( "//*[local-name()='t' or local-name()='tab' or local-name()='br']", paragraph ); | ||
ts.forEach( function( t ) { | ||
@@ -57,7 +55,9 @@ if ( t.localName === 't' && t.childNodes.length > 0 ) { | ||
} else { | ||
cb( new Error( | ||
'Extraction could not find content in file, are you' | ||
cb( | ||
new Error( | ||
'Extraction could not find content in file, are you' | ||
+ ' sure it is the mime type it says it is?' | ||
), | ||
null ); | ||
), | ||
null | ||
); | ||
} | ||
@@ -64,0 +64,0 @@ } |
@@ -8,3 +8,4 @@ var { exec } = require( 'child_process' ) | ||
, escapedPath = filePath.replace( /\s/g, '\\ ' ); | ||
exec( 'drawingtotext ' + escapedPath, | ||
exec( | ||
'drawingtotext ' + escapedPath, | ||
execOptions, | ||
@@ -20,7 +21,9 @@ function( error, stdout, stderr ) { | ||
cb( null, stdout ); | ||
}); | ||
} | ||
); | ||
} | ||
function testForBinary( options, cb ) { | ||
exec( 'drawingtotext notalegalfile', | ||
exec( | ||
'drawingtotext notalegalfile', | ||
function( error, stdout, stderr ) { | ||
@@ -36,3 +39,4 @@ var msg | ||
} | ||
}); | ||
} | ||
); | ||
} | ||
@@ -39,0 +43,0 @@ |
@@ -19,8 +19,15 @@ var { exec } = require( 'child_process' ) | ||
var execOptions = util.createExecOptions( 'images', options ); | ||
util.runExecIntoFile( 'tesseract', filePath, options, | ||
execOptions, tesseractExtractionCommand, cb ); | ||
util.runExecIntoFile( | ||
'tesseract', | ||
filePath, | ||
options, | ||
execOptions, | ||
tesseractExtractionCommand, | ||
cb | ||
); | ||
} | ||
function testForBinary( options, cb ) { | ||
exec( 'tesseract', | ||
exec( | ||
'tesseract', | ||
function( error, stdout, stderr ) { | ||
@@ -38,3 +45,4 @@ var msg; | ||
} | ||
}); | ||
} | ||
); | ||
} | ||
@@ -41,0 +49,0 @@ |
@@ -24,3 +24,4 @@ var path = require( 'path' ) | ||
function testForBinary( options, cb ) { | ||
exec( 'pdftotext -v', | ||
exec( | ||
'pdftotext -v', | ||
function( error, stdout, stderr ) { | ||
@@ -35,3 +36,4 @@ var msg; | ||
} | ||
}); | ||
} | ||
); | ||
} | ||
@@ -38,0 +40,0 @@ |
var xpath = require( 'xpath' ) | ||
, Dom = require( 'xmldom' ).DOMParser | ||
, Dom = require( '@xmldom/xmldom' ).DOMParser | ||
, yauzl = require( 'yauzl' ) | ||
@@ -26,4 +26,6 @@ , util = require( '../util' ) | ||
paragraph = new Dom().parseFromString( paragraph.toString() ); | ||
ts = xpath.select( "//*[local-name()='t' or local-name()='tab' or local-name()='br']", | ||
paragraph ); | ||
ts = xpath.select( | ||
"//*[local-name()='t' or local-name()='tab' or local-name()='br']", | ||
paragraph | ||
); | ||
ts.forEach( function( t ) { | ||
@@ -30,0 +32,0 @@ if ( t.localName === 't' && t.childNodes.length > 0 ) { |
@@ -24,3 +24,4 @@ var { exec } = require( 'child_process' ) | ||
// unrtf --quiet option doesn't work. | ||
exec( 'unrtf --html --nopict ' + escapedPath, | ||
exec( | ||
'unrtf --html --nopict ' + escapedPath, | ||
execOptions, | ||
@@ -36,3 +37,4 @@ function( error, stdout /* , stderr */ ) { | ||
} | ||
}); | ||
} | ||
); | ||
} | ||
@@ -47,3 +49,4 @@ | ||
exec( 'unrtf ' + __filename, | ||
exec( | ||
'unrtf ' + __filename, | ||
function( error /* , stdout, stderr */ ) { | ||
@@ -59,3 +62,4 @@ var msg; | ||
} | ||
}); | ||
} | ||
); | ||
} | ||
@@ -62,0 +66,0 @@ |
@@ -49,3 +49,4 @@ var { exec } = require( 'child_process' ) | ||
function unzipCheck( type, cb ) { | ||
exec( 'unzip', | ||
exec( | ||
'unzip', | ||
function( error /* , stdout, stderr */ ) { | ||
@@ -58,3 +59,4 @@ if ( error ) { | ||
cb( error === null ); | ||
}); | ||
} | ||
); | ||
} | ||
@@ -112,3 +114,5 @@ | ||
, cmd = genCommand( options, escapedFilePath, escapedFileTempOutPath ); | ||
exec( cmd, execOptions, | ||
exec( | ||
cmd, | ||
execOptions, | ||
function( error /* , stdout, stderr */ ) { | ||
@@ -148,3 +152,4 @@ if ( error !== null ) { | ||
}); | ||
}); | ||
} | ||
); | ||
} | ||
@@ -151,0 +156,0 @@ |
{ | ||
"name": "@nosferatu500/textract", | ||
"version": "3.0.1", | ||
"version": "3.0.2", | ||
"homepage": "https://github.com/nosferatu500/textract", | ||
@@ -53,10 +53,5 @@ "description": "Extracting text from files of various type including html, pdf, doc, docx, xls, xlsx, csv, pptx, png, jpg, gif, rtf, text/*, and various open office.", | ||
"dependencies": { | ||
"mime": "2.5.2", | ||
"pdf-text-extract": "1.5.0", | ||
"xpath": "0.0.32", | ||
"xmldom": "^0.6.0", | ||
"xlsx": "^0.17.0", | ||
"@xmldom/xmldom": "^0.7.5", | ||
"cheerio": "1.0.0-rc.10", | ||
"marked": "2.1.3", | ||
"meow": "^9.0.0", | ||
"epub2": "1.3.8", | ||
"got": "10.7.0", | ||
@@ -66,13 +61,18 @@ "html-entities": "2.3.2", | ||
"jschardet": "3.0.0", | ||
"yauzl": "2.10.0", | ||
"epub2": "1.3.8" | ||
"marked": "2.1.3", | ||
"meow": "^9.0.0", | ||
"mime": "3.0.0", | ||
"pdf-text-extract": "1.5.0", | ||
"xlsx": "^0.17.4", | ||
"xpath": "0.0.32", | ||
"yauzl": "2.10.0" | ||
}, | ||
"devDependencies": { | ||
"chai": "4.3.4", | ||
"eslint": "^7.32.0", | ||
"eslint-config-airbnb": "^18.2.1", | ||
"eslint-plugin-import": "^2.23.4", | ||
"eslint-plugin-jsx-a11y": "^6.4.1", | ||
"eslint-plugin-react": "^7.24.0", | ||
"mocha": "^9.0.3" | ||
"eslint": "^8.3.0", | ||
"eslint-config-airbnb": "^19.0.0", | ||
"eslint-plugin-import": "^2.25.3", | ||
"eslint-plugin-jsx-a11y": "^6.5.1", | ||
"eslint-plugin-react": "^7.27.1", | ||
"mocha": "^9.1.3" | ||
}, | ||
@@ -88,3 +88,3 @@ "scripts": { | ||
"engines": { | ||
"node": ">=8.10" | ||
"node": ">=14" | ||
}, | ||
@@ -95,2 +95,2 @@ "bin": { | ||
"main": "./lib/index" | ||
} | ||
} |
68213
1300
+ Added@xmldom/xmldom@^0.7.5
+ Added@xmldom/xmldom@0.7.13(transitive)
+ Addedmime@3.0.0(transitive)
- Removedxmldom@^0.6.0
- Removedmime@2.5.2(transitive)
- Removedxmldom@0.6.0(transitive)
Updatedmime@3.0.0
Updatedxlsx@^0.17.4