Socket
Socket
Sign inDemoInstall

crawler-request

Package Overview
Dependencies
6
Maintainers
1
Versions
11
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 1.1.7 to 1.1.8

35

crawler-request.js

@@ -22,3 +22,3 @@ const Axios = require('axios');

const MIME_REGEX = /.*\.(jpg|png|gif|dotx|webp|flif|cr2|tif|bmp|jxr|psd|rar|zip|tar|rar|js|gz|bz2|7z|dmg|mp4|m4v|mid|mkv|webm|mov|avi|wmv|mpg|mp3|m4a|ogg|opus|flac|wav|amr|epub|exe|swf|rtf|woff|woff2|eot|ttf|otf|ico|flv|ps|xz|sqlite|nes|crx|xpi|cab|dep|ar|rpm|z|lz|msi|mxf|mts|wasm|blend|bpg|docx|pptx|xlsx|3gp|css|xlam|xla|xls|xps|exe)$/i;
const MIME_REGEX = /.*\.(jpg|png|gif|dotx|doc|webp|flif|cr2|tif|bmp|jxr|psd|rar|zip|tar|rar|js|gz|bz2|7z|dmg|mp4|m4v|mid|mkv|webm|mov|avi|wmv|mpg|mp3|m4a|ogg|opus|flac|wav|amr|epub|exe|swf|rtf|woff|woff2|eot|ttf|otf|ico|flv|ps|xz|sqlite|nes|crx|xpi|cab|dep|ar|rpm|z|lz|msi|mxf|mts|wasm|blend|bpg|docx|pptx|xlsx|3gp|css|xlam|xla|xls|xps|exe)$/i;

@@ -96,4 +96,8 @@ function _crawler_request(current_url) {

let pdfParser = new Pdf2Json(this, 1);
pdfParser.on("pdfParser_dataError", errData => {
throw errData
pdfParser.on("pdfParser_dataError", err => {
ret.status = -222;
ret.error = err.parserError ? err.parserError : "pdf parser error.";
resolve(null);
//return ret;
//throw errData
});

@@ -104,3 +108,6 @@ pdfParser.on("pdfParser_dataReady", pdfData => resolve(pdfParser.getRawTextContent()));

.then(res => {
ret.text = res.replace(/Page[\(\)\s0-9]+Break/ig, '');
if (res) {
ret.text = res.replace(/Page[\(\)\s0-9]+Break/ig, '');
}
ret.type = "pdf";

@@ -110,3 +117,6 @@ return ret;

.catch(err => {
throw err
ret.status = -222;
ret.error = err.toString();
return ret;
//throw err
});

@@ -132,3 +142,3 @@ } else {

let current_status = -1;
let current_status = -100;

@@ -141,3 +151,3 @@ if (MIME_REGEX.test(current_url)) {

text: null,
status: -1,
status: -100,
error: "unsupported-extension"

@@ -153,3 +163,3 @@ });

.then(function (res) {
res.status = current_status;
res.status = res.status == -222 ? -222 : current_status;
return res;

@@ -163,3 +173,3 @@ })

text: null, //err.response.status
status: err.response && err.response.status ? err.response.status : -1,
status: err.response && err.response.status ? err.response.status : -111,
error: err.toString()

@@ -285,2 +295,9 @@ };

//let result_11 = yield crawler_request_wrapper("https://www.nanomagnetics-inst.com/usrfiles/files/Articles/RT-SHPM/RT-SHPM-1.pdf");
//debugger;
//process.exit();

@@ -287,0 +304,0 @@

{
"name": "crawler-request",
"version": "1.1.7",
"version": "1.1.8",
"description": "Http requests module customized for crawlers.",

@@ -5,0 +5,0 @@ "main": "crawler-request.js",

@@ -11,3 +11,3 @@ const assert = require('assert');

assert.equal(res.type, "none");
assert.equal(res.status, -1);
assert.equal(res.status, -111);
assert.notEqual(res.error, null);

@@ -22,3 +22,3 @@ });

assert.equal(res.type, "none");
assert.equal(res.status, -1);
assert.equal(res.status, -100);
assert.notEqual(res.error, null);

@@ -25,0 +25,0 @@ assert.equal(res.error, "unsupported-extension");

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc