Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

crawler

Package Overview
Dependencies
Maintainers
0
Versions
40
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

crawler - npm Package Compare versions

Comparing version 2.0.1 to 2.0.2

4

dist/crawler.d.ts

@@ -9,3 +9,2 @@ /// <reference types="node" />

options: CrawlerOptions;
globalOnlyOptions: string[];
seen: any;

@@ -19,3 +18,2 @@ constructor(options?: CrawlerOptions);

/**
*
* @param rateLimiterId

@@ -34,3 +32,2 @@ * @param property

/**
*
* @param options

@@ -59,3 +56,2 @@ * @returns if there is a "callback" function in the options, return the result of the callback function. \

/**
*
* @param options

@@ -62,0 +58,0 @@ * @description Add a request to the queue.

58

dist/crawler.js

@@ -12,5 +12,4 @@ import { EventEmitter } from "events";

// @todo: remove seenreq dependency
process.env.NODE_ENV = process.env.NODE_ENV ?? process.argv[2];
// process.env.NODE_ENV = "debug";
logOptions.minLevel = process.env.NODE_ENV === "debug" ? 0 : 3;
// process.env.NODE_ENV = process.env.NODE_ENV ?? process.argv[2];
logOptions.minLevel = process.env.NODE_ENV === "debug" ? 0 : process.env.NODE_ENV === "test" ? 7 : 3;
const log = new Logger(logOptions);

@@ -30,3 +29,5 @@ class Crawler extends EventEmitter {

this.emit("schedule", options);
this._limiters.getRateLimiter(options.rateLimiterId).submit(options.priority, (done, rateLimiterId) => {
this._limiters
.getRateLimiter(options.rateLimiterId)
.submit(options.priority, (done, rateLimiterId) => {
options.release = () => {

@@ -44,12 +45,14 @@ done();

}
else if (typeof options.uri === "function") {
options.uri((uri) => {
options.url = uri;
this._execute(options);
});
}
else {
options.url = options.url ?? options.uri;
delete options.uri;
this._execute(options);
if (typeof options.url === "function") {
options.url((url) => {
options.url = url;
this._execute(options);
});
}
else {
delete options.uri;
this._execute(options);
}
}

@@ -116,3 +119,3 @@ });

if (options.retries && options.retries > 0) {
log.warn(`${error} when fetching ${options.url} ${options.retries ? `(${options.retries} retries left)` : ""}`);
log.warn(`${error} occurred on ${options.url}. ${options.retries ? `(${options.retries} retries left)` : ""}`);
setTimeout(() => {

@@ -125,7 +128,9 @@ options.retries--;

else {
log.error(`${error} when fetching ${options.url}. Request failed.`);
log.error(`${error} occurred on ${options.url}. Request failed.`);
if (options.callback && typeof options.callback === "function") {
return options.callback(error, { options }, options.release);
}
throw error;
else {
throw error;
}
}

@@ -163,3 +168,3 @@ }

}
if (options.jQuery === true) {
if (options.jQuery === true && !options.isJson) {
if (response.body === "" || !this._detectHtmlOnHeaders(response.headers)) {

@@ -183,3 +188,2 @@ log.warn("response body is not HTML, skip injecting. Set jQuery to false to mute this warning.");

/**
*
* @param options

@@ -204,5 +208,2 @@ * @returns if there is a "callback" function in the options, return the result of the callback function. \

setDefaults(options, this.options);
this.globalOnlyOptions.forEach(globalOnlyOption => {
delete options[globalOnlyOption];
});
options.skipEventRequest = isBoolean(options.skipEventRequest) ? options.skipEventRequest : true;

@@ -221,3 +222,2 @@ delete options.preRequest;

/**
*
* @param options

@@ -247,5 +247,2 @@ * @description Add a request to the queue.

options.headers = { ...this.options.headers, ...options.headers };
this.globalOnlyOptions.forEach(globalOnlyOption => {
delete options[globalOnlyOption];
});
if (!this.options.skipDuplicates) {

@@ -287,2 +284,3 @@ this._schedule(options);

isJson: false,
silence: false,
};

@@ -293,10 +291,5 @@ this.options = { ...defaultOptions, ...options };

}
this.globalOnlyOptions = [
"maxConnections",
"rateLimit",
"priorityLevels",
"skipDuplicates",
"homogeneous",
"userAgents",
];
if (this.options.silence) {
log.settings.minLevel = 7;
}
this._limiters = new Cluster({

@@ -328,3 +321,2 @@ maxConnections: this.options.maxConnections,

/**
*
* @param rateLimiterId

@@ -331,0 +323,0 @@ * @param property

import { RequestConfig, RequestOptions } from "./types/crawler.js";
export declare const globalOnlyOptions: string[];
export declare const crawlerOnlyOptions: string[];
export declare const deprecatedOptions: string[];
export declare const getCharset: (headers: Record<string, unknown>) => null | string;

@@ -3,0 +6,0 @@ export declare const getValidOptions: (options: RequestConfig) => RequestOptions;

import { HttpProxyAgent, HttpsProxyAgent } from "hpagent";
import http2Wrapper from "http2-wrapper";
import { cleanObject, getType, isValidUrl } from "./lib/utils.js";
export const globalOnlyOptions = [
"maxConnections",
"priorityLevels",
"rateLimit",
"skipDuplicates",
"homogeneous",
"userAgents",
"silence",
];
export const crawlerOnlyOptions = [
"rateLimiterId",
"forceUTF8",
"jQuery",
"retryInterval",
"priority",
"proxy",
"retries",
"preRequest",
"callback",
"release",
"isJson",
"referer",
"rejectUnauthorized",
"userParams",
].concat(globalOnlyOptions);
export const deprecatedOptions = [
"uri",
"qs",
"strictSSL",
"incomingEncoding",
"gzip",
"jar",
"jsonReviver",
"jsonReplacer",
"skipEventRequest",
];
export const getCharset = (headers) => {

@@ -36,21 +72,2 @@ let charset = null;

export const alignOptions = (options) => {
const crawlerOnlyOptions = [
"rateLimiterId",
"forceUTF8",
"incomingEncoding",
"jQuery",
"retryInterval",
"priority",
"proxy",
"retries",
"preRequest",
"callback",
"release",
"userAgents",
"isJson",
"referer",
"rejectUnauthorized",
"userParams",
];
const deprecatedOptions = ["uri", "qs", "strictSSL", "gzip", "jar", "jsonReviver", "jsonReplacer", "skipEventRequest"].concat(crawlerOnlyOptions);
const gotOptions = {

@@ -100,6 +117,6 @@ ...options,

options.encoding = options.incomingEncoding;
delete options["incomingEncoding"];
gotOptions.responseType = "buffer";
Object.keys(gotOptions).forEach(key => {
if (deprecatedOptions.includes(key)) {
const invalidOptions = crawlerOnlyOptions.concat(deprecatedOptions);
invalidOptions.forEach(key => {
if (key in gotOptions) {
delete gotOptions[key];

@@ -106,0 +123,0 @@ }

@@ -42,2 +42,8 @@ export type GlobalOnlyOptions = {

userAgents?: string | string[];
/**
* Global Only option.
* @default false
* @description If true, the crawler will mute all warning and error messages. The request error will be still thrown.
*/
silence?: boolean;
};

@@ -44,0 +50,0 @@ export type RequestOptions = {

{
"name": "crawler",
"version": "2.0.1",
"version": "2.0.2",
"description": "Crawler is a ready-to-use web spider that works with proxies, asynchrony, rate limit, configurable request pools, jQuery, and HTTP/2 support.",

@@ -12,4 +12,4 @@ "repository": {

"build": "tsc",
"test": "ava",
"cover": "c8 ava"
"test": "NODE_ENV=test ava",
"cover": "NODE_ENV=test c8 ava"
},

@@ -45,3 +45,2 @@ "engines": {

"seenreq": "^3.0.0",
"sinon": "^18.0.0",
"tslog": "^4.9.3"

@@ -52,3 +51,3 @@ },

"@types/got": "^9.6.12",
"@types/node": "^20.14.7",
"@types/node": "^20.14.8",
"ava": "^6.1.3",

@@ -59,6 +58,7 @@ "c8": "^10.1.2",

"nock": "^13.5.4",
"sinon": "^18.0.0",
"tough-cookie": "^4.1.4",
"tsx": "^4.15.7",
"typescript": "^5.5.2",
"typescript-eslint": "8.0.0-alpha.27"
"typescript": "^5.4.5",
"typescript-eslint": "8.0.0-alpha.30"
},

@@ -74,3 +74,2 @@ "ava": {

},
"failFast": true,
"verbose": true

@@ -77,0 +76,0 @@ },

@@ -431,2 +431,7 @@ <p align="center">

#### `silence`
- **Type:** `boolean`
- **Default** : false
- If true, the crawler will mute all warning and error messages. The request error will be still reported.
#### `maxConnections`

@@ -433,0 +438,0 @@

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc