Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

crawler-ninja

Package Overview
Dependencies
Maintainers
1
Versions
28
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

crawler-ninja - npm Package Compare versions

Comparing version 0.1.15 to 0.1.16

3

default-lists/domain-black-list.js

@@ -146,5 +146,6 @@ module.exports.list = function () {

"zanox",
"ovh"
"ovh",
"webadmax.com"
];
}

@@ -7,3 +7,3 @@ var timers = require('timers');

var requester = require("./lib/queue-requester");
var URI = require('./lib/uri.js');
var URI = require('crawler-ninja-uri');
var html = require("./lib/html.js");

@@ -84,2 +84,4 @@ var store = require("./lib/store/store.js");

* - proxyList : the list of proxies (see the project simple-proxies on npm)
* - storeModuleName : the npm nodule name used for the store implementation, by default memory-store
* - storeParams : the params to pass to the store module when create it.
*

@@ -86,0 +88,0 @@ * + all options provided by nodejs request : https://github.com/request/request

@@ -73,3 +73,3 @@ /**

})//.setMaxListeners(0);
}).setMaxListeners(0);
};

@@ -76,0 +76,0 @@

@@ -10,3 +10,3 @@ /**

var log = require("crawler-ninja-logger").Logger;
var URI = require("./uri.js");
var URI = require("crawler-ninja-uri");
var request = require("./http-request.js");

@@ -301,2 +301,3 @@ var store = require("../lib/store/store.js");

log.error({"url" : options.url, "step" : "queue-requester.onRequestError", "message" : "unmanaged error", options : error});
// For the other kind of errors, just inform the crawler

@@ -392,3 +393,3 @@ requestQueue.onCrawl(error, result, function(error) {

/**
* Recrawl an url if the maximum of retries is no yet fetch
* Recrawl an url if the maximum of retries is no yet fetched
*

@@ -395,0 +396,0 @@ *

@@ -9,3 +9,3 @@ /**

var Map = require("collections/fast-map");
var URI = require('../uri.js');
var URI = require('crawler-ninja-uri');

@@ -84,3 +84,3 @@

Store.prototype.isInCrawlHistory = function(url, callback) {
/* TODO :
/* TODO :
process.nextTick(function() {

@@ -87,0 +87,0 @@ callback(...);

{
"name": "crawler-ninja",
"version": "0.1.15",
"version": "0.1.16",
"description": "A web crawler made for the SEO based on plugins. Please wait or contribute ... still in beta",

@@ -11,7 +11,7 @@ "main": "index.js",

"dependencies": {
"URIjs": "*",
"crawler-ninja-uri" : "*",
"crawler-ninja-logger": "*",
"async": " *",
"cheerio": "*",
"collections": "*",
"crawler-ninja-logger": "*",
"crypto": "*",

@@ -18,0 +18,0 @@ "request": "*",

var crypto = require('crypto');
var Map = require("collections/fast-map");
var Set = require("collections/fast-set");
var URI = require('../lib/uri.js');
var URI = require('crawler-ninja-uri');

@@ -6,0 +6,0 @@ var CONTENT_TYPE_HEADER = "content-type";

@@ -5,3 +5,3 @@ /**

*/
var URI = require('URIjs');
var URI = require('crawler-ninja-uri');

@@ -52,6 +52,4 @@

this.addHostname(URI.host(result.uri));
var uri = URI(result.uri);
this.addHostname(uri.hostname());
if ($) {

@@ -58,0 +56,0 @@ this.data.numberOfHTMLs++;

@@ -25,3 +25,3 @@ Crawler Ninja

Then accept the license & rerun :
$ npm install crawler-ninja --save (sudo is not required)
$ npm install crawler-ninja --save (sudo is not required, in theory)

@@ -56,11 +56,10 @@ Crash course

If you want used the default options (see below), you can pass null in the first argument of the method init.
You can also reduce the scope of the crawl by using the different crawl options (see below the section : option references).
## Create a new plugin
The following code show you the events callbacks that your have to implement for creating a new plugin.
The following code show you the functions that your have to implement for creating a new plugin.
This is not mandatory to implement all plugin functions. You can also reduce the scope of the crawl by using the different crawl options (see below the section : option references).
This is not mandatory to implement all plugin functions.
```javascript

@@ -177,3 +176,3 @@ function Plugin() {

- links : if true crawl link tags, default = true.
- linkTypes : the type of the links tags to crawl (match to the rel attribute), default = ["canonical", "stylesheet"].
- linkTypes : the type of the links tags to crawl (match to the rel attribute), default = ["canonical", "stylesheet", "icon"].
- images : if true crawl images, default = true.

@@ -299,3 +298,3 @@ - depthLimit : the depth limit for the crawl, default is no limit.

```
The previous code init the crawler with a dedug level.
The previous code init the crawler with a dedug level. If you don't use proxies, set the proxyList argument to null.

@@ -389,2 +388,6 @@

Utilities
---------
- See on npm the module "crawler-ninja-uri" that can be used for extracting info and transforming URLs
Current Plugins

@@ -482,1 +485,5 @@ ---------------

- Review README.md
0.1.16
- Externalize URI.js in order to used it into Stores and plugins.
- Review log info on error
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc