crawler-request
Advanced tools
Comparing version 1.1.7 to 1.1.8
@@ -22,3 +22,3 @@ const Axios = require('axios'); | ||
const MIME_REGEX = /.*\.(jpg|png|gif|dotx|webp|flif|cr2|tif|bmp|jxr|psd|rar|zip|tar|rar|js|gz|bz2|7z|dmg|mp4|m4v|mid|mkv|webm|mov|avi|wmv|mpg|mp3|m4a|ogg|opus|flac|wav|amr|epub|exe|swf|rtf|woff|woff2|eot|ttf|otf|ico|flv|ps|xz|sqlite|nes|crx|xpi|cab|dep|ar|rpm|z|lz|msi|mxf|mts|wasm|blend|bpg|docx|pptx|xlsx|3gp|css|xlam|xla|xls|xps|exe)$/i; | ||
const MIME_REGEX = /.*\.(jpg|png|gif|dotx|doc|webp|flif|cr2|tif|bmp|jxr|psd|rar|zip|tar|rar|js|gz|bz2|7z|dmg|mp4|m4v|mid|mkv|webm|mov|avi|wmv|mpg|mp3|m4a|ogg|opus|flac|wav|amr|epub|exe|swf|rtf|woff|woff2|eot|ttf|otf|ico|flv|ps|xz|sqlite|nes|crx|xpi|cab|dep|ar|rpm|z|lz|msi|mxf|mts|wasm|blend|bpg|docx|pptx|xlsx|3gp|css|xlam|xla|xls|xps|exe)$/i; | ||
@@ -96,4 +96,8 @@ function _crawler_request(current_url) { | ||
let pdfParser = new Pdf2Json(this, 1); | ||
pdfParser.on("pdfParser_dataError", errData => { | ||
throw errData | ||
pdfParser.on("pdfParser_dataError", err => { | ||
ret.status = -222; | ||
ret.error = err.parserError ? err.parserError : "pdf parser error."; | ||
resolve(null); | ||
//return ret; | ||
//throw errData | ||
}); | ||
@@ -104,3 +108,6 @@ pdfParser.on("pdfParser_dataReady", pdfData => resolve(pdfParser.getRawTextContent())); | ||
.then(res => { | ||
ret.text = res.replace(/Page[\(\)\s0-9]+Break/ig, ''); | ||
if (res) { | ||
ret.text = res.replace(/Page[\(\)\s0-9]+Break/ig, ''); | ||
} | ||
ret.type = "pdf"; | ||
@@ -110,3 +117,6 @@ return ret; | ||
.catch(err => { | ||
throw err | ||
ret.status = -222; | ||
ret.error = err.toString(); | ||
return ret; | ||
//throw err | ||
}); | ||
@@ -132,3 +142,3 @@ } else { | ||
let current_status = -1; | ||
let current_status = -100; | ||
@@ -141,3 +151,3 @@ if (MIME_REGEX.test(current_url)) { | ||
text: null, | ||
status: -1, | ||
status: -100, | ||
error: "unsupported-extension" | ||
@@ -153,3 +163,3 @@ }); | ||
.then(function (res) { | ||
res.status = current_status; | ||
res.status = res.status == -222 ? -222 : current_status; | ||
return res; | ||
@@ -163,3 +173,3 @@ }) | ||
text: null, //err.response.status | ||
status: err.response && err.response.status ? err.response.status : -1, | ||
status: err.response && err.response.status ? err.response.status : -111, | ||
error: err.toString() | ||
@@ -285,2 +295,9 @@ }; | ||
//let result_11 = yield crawler_request_wrapper("https://www.nanomagnetics-inst.com/usrfiles/files/Articles/RT-SHPM/RT-SHPM-1.pdf"); | ||
//debugger; | ||
//process.exit(); | ||
@@ -287,0 +304,0 @@ |
{ | ||
"name": "crawler-request", | ||
"version": "1.1.7", | ||
"version": "1.1.8", | ||
"description": "Http requests module customized for crawlers.", | ||
@@ -5,0 +5,0 @@ "main": "crawler-request.js", |
@@ -11,3 +11,3 @@ const assert = require('assert'); | ||
assert.equal(res.type, "none"); | ||
assert.equal(res.status, -1); | ||
assert.equal(res.status, -111); | ||
assert.notEqual(res.error, null); | ||
@@ -22,3 +22,3 @@ }); | ||
assert.equal(res.type, "none"); | ||
assert.equal(res.status, -1); | ||
assert.equal(res.status, -100); | ||
assert.notEqual(res.error, null); | ||
@@ -25,0 +25,0 @@ assert.equal(res.error, "unsupported-extension"); |
24448
467