Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

crawler

Package Overview
Dependencies
Maintainers
4
Versions
40
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

crawler - npm Package Compare versions

Comparing version 2.0.0-beta.5 to 2.0.0

crawler_primary.png

14

dist/crawler.js

@@ -154,3 +154,3 @@ import { EventEmitter } from "events";

}
catch (err) {
catch (_err) {
log.warn("JSON parsing failed, body is not JSON. Set isJson to false to mute this warning.");

@@ -167,3 +167,3 @@ }

}
catch (err) {
catch (_err) {
log.warn("HTML detected failed. Set jQuery to false to mute this warning.");

@@ -244,6 +244,3 @@ }

if (!this.options.skipDuplicates) {
try {
this._schedule(options);
}
catch (err) { }
this._schedule(options);
return;

@@ -255,6 +252,3 @@ }

if (!rst) {
try {
this._schedule(options);
}
catch (err) { }
this._schedule(options);
}

@@ -261,0 +255,0 @@ })

@@ -24,3 +24,3 @@ import Queue from "./queue.js";

priority = this._elements.length - 1;
console.error(`Invalid priority: ${priority} must be between 0 and ${this._elements.length - 1}`);
throw new RangeError(`Invalid priority: ${priority} must be between 0 and ${this._elements.length - 1}`);
}

@@ -37,4 +37,3 @@ this._elements[priority].enqueue(value);

}
console.error("multiPriorityQueue is empty");
return undefined;
throw new ReferenceError("multiPriorityQueue is empty");
}

@@ -41,0 +40,0 @@ }

@@ -18,6 +18,14 @@ /**

export declare const isValidUrl: (url: string) => boolean;
export declare function flattenDeep<T>(array: T[]): T[];
export declare function pick<T extends Object, K extends keyof T>(target: T, keys: (keyof T)[]): Pick<T, K>;
/**
*
* @param array
* @returns a flattened array
* @description
* Flattens an array of arrays recursively.
*
*/
export declare function flattenDeep(array: any[]): any[];
export declare function pick<T extends object, K extends keyof T>(target: T, keys: (keyof T)[]): Pick<T, K>;
/**
*
* @param obj

@@ -24,0 +32,0 @@ * @returns a cleaned object

@@ -29,6 +29,7 @@ /**

}
catch (e) {
catch (_e) {
return false;
}
};
// export function flattenDeep<T>(array: T[]): T[];
/**

@@ -35,0 +36,0 @@ *

@@ -24,3 +24,3 @@ import { HttpProxyAgent, HttpsProxyAgent } from "hpagent";

}
catch (e) {
catch (_err) {
throw new TypeError(`Invalid options: ${JSON.stringify(options)}`);

@@ -115,3 +115,3 @@ }

else {
const domain = gotOptions.url.match(/^(\w+):\/\/([^\/]+)/);
const domain = gotOptions.url.match(/^(\w+):\/\/([^/]+)/);
if (domain)

@@ -118,0 +118,0 @@ gotOptions.headers.referer = domain[0];

@@ -9,6 +9,4 @@ import { multiPriorityQueue } from "../lib/index.js";

this.priorityLevels = priorityLevels;
this.defaultPriority = Number.isInteger(defaultPriority)
? defaultPriority
: Math.floor(this.priorityLevels / 2);
this.defaultPriority >= priorityLevels ? priorityLevels - 1 : defaultPriority;
this.defaultPriority = Number(defaultPriority);
this.defaultPriority = Number.isInteger(defaultPriority) ? Math.min(Math.max(defaultPriority, 0), priorityLevels - 1) : Math.floor(priorityLevels / 2);
this.nextRequestTime = Date.now();

@@ -15,0 +13,0 @@ this._waitingTasks = new multiPriorityQueue(priorityLevels);

@@ -110,4 +110,4 @@ export type GlobalOnlyOptions = {

*/
jar?: Object;
cookieJar?: Object;
jar?: object;
cookieJar?: object;
/**

@@ -114,0 +114,0 @@ * @description If true, the crawler will parse the response body as JSON.

{
"name": "crawler",
"version": "2.0.0-beta.5",
"description": "New TypeScript Crawler Test version",
"version": "2.0.0",
"description": "Crawler is a ready-to-use web spider that works with proxies, asynchrony, rate limit, configurable request pools, jQuery, and seamless HTTP/2 support.",
"repository": {

@@ -11,13 +11,30 @@ "type": "git",

"scripts": {
"test": "npx mocha --timeout=15000 tests/*.test.js"
"build": "tsc",
"test": "mocha"
},
"engines": {
"node": ">=16.20.2"
"node": ">=18"
},
"type": "module",
"keywords": [],
"keywords": [
"javascript",
"crawler",
"spider",
"scraper",
"scraping",
"jquery",
"nodejs",
"http",
"https",
"http2",
"got",
"request",
"url",
"network",
"gzip"
],
"license": "MIT",
"dependencies": {
"cheerio": "1.0.0-rc.12",
"got": "^13.0.0",
"got": "^14.4.0",
"hpagent": "^1.2.0",

@@ -30,5 +47,8 @@ "http2-wrapper": "^2.2.1",

"devDependencies": {
"@eslint/js": "^9.4.0",
"@types/got": "^9.6.12",
"@types/node": "^20.10.6",
"chai": "^5.1.0",
"@types/node": "^20.14.2",
"chai": "^5.1.1",
"eslint": "~9.4.0",
"globals": "^15.3.0",
"mocha": "^10.4.0",

@@ -39,4 +59,13 @@ "mocha-testdata": "^1.2.0",

"tough-cookie": "^4.1.4",
"tsx": "^4.7.3"
"tsx": "^4.11.2",
"typescript": "^5.4.5",
"typescript-eslint": "8.0.0-alpha.27"
},
"mocha": {
"files": [
"test/**/*.js"
],
"timeout": 15000,
"exit": true
}
}

@@ -23,3 +23,3 @@ <p align="center">

- Server-side DOM & automatic jQuery insertion with Cheerio (default) or JSDOM,
- Server-side DOM & automatic jQuery insertion with Cheerio (default),
- Configurable pool size and retries,

@@ -36,4 +36,4 @@ - Control rate limit,

Requires Node.js 16 or above
Requires Node.js 18 or above
**IMPORTANT:** If you are using a Linux OS, we currently recommend sticking with Node.js version 18 for the time being, rather than opting for higher versions (even if some dependencies suggest 20 or later). Our unit tests have encountered stability issues on Linux with higher versions of Node.js, which may be caused by more profound underlying reasons. However, at present, we do not have the resources to address these issues.
```sh

@@ -482,3 +482,3 @@ $ npm install crawler

- Same as the options of [got options](https://github.com/sindresorhus/got/blob/main/documentation/2-options.md)
- Same as the options of [options](https://github.com/sindresorhus/got/blob/main/documentation/2-options.md)

@@ -629,3 +629,3 @@ #### `forceUTF8`

`options.json` → `options.isJson` (Boolean. The "json" option is now work completely different in Got.)
`options.json` → `options.isJson` (Boolean. The "json" option is now work completely different.)

@@ -643,5 +643,2 @@ `options.limiter` → `options.rateLimiterId`

### Origin Request Options
*Since we have switched from `request` to `got`, the following option names have been updated accordingly.*
`incomingEncoding` → `encoding`

@@ -668,3 +665,3 @@

- use “jquery/JQuery/..." => **Only "jQuery" will be accepted.**
- use "body" as the POST form => **Please use "form" instead. For more, see [got options](https://github.com/sindresorhus/got/blob/main/documentation/2-options.md) .**
- use "body" as the POST form => **Please use "form" instead. For more, see [options](https://github.com/sindresorhus/got/blob/main/documentation/2-options.md) .**
- add custom options on request options => **Not allowed. Only options.userParams could pass through the response.**

@@ -671,0 +668,0 @@ - We are temporarily no longer supporting jsdom for certain reasons.

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc