crawler-ninja
Advanced tools
Comparing version 0.1.15 to 0.1.16
@@ -146,5 +146,6 @@ module.exports.list = function () { | ||
"zanox", | ||
"ovh" | ||
"ovh", | ||
"webadmax.com" | ||
]; | ||
} |
@@ -7,3 +7,3 @@ var timers = require('timers'); | ||
var requester = require("./lib/queue-requester"); | ||
var URI = require('./lib/uri.js'); | ||
var URI = require('crawler-ninja-uri'); | ||
var html = require("./lib/html.js"); | ||
@@ -84,2 +84,4 @@ var store = require("./lib/store/store.js"); | ||
* - proxyList : the list of proxies (see the project simple-proxies on npm) | ||
* - storeModuleName : the npm nodule name used for the store implementation, by default memory-store | ||
* - storeParams : the params to pass to the store module when create it. | ||
* | ||
@@ -86,0 +88,0 @@ * + all options provided by nodejs request : https://github.com/request/request |
@@ -73,3 +73,3 @@ /** | ||
})//.setMaxListeners(0); | ||
}).setMaxListeners(0); | ||
}; | ||
@@ -76,0 +76,0 @@ |
@@ -10,3 +10,3 @@ /** | ||
var log = require("crawler-ninja-logger").Logger; | ||
var URI = require("./uri.js"); | ||
var URI = require("crawler-ninja-uri"); | ||
var request = require("./http-request.js"); | ||
@@ -301,2 +301,3 @@ var store = require("../lib/store/store.js"); | ||
log.error({"url" : options.url, "step" : "queue-requester.onRequestError", "message" : "unmanaged error", options : error}); | ||
// For the other kind of errors, just inform the crawler | ||
@@ -392,3 +393,3 @@ requestQueue.onCrawl(error, result, function(error) { | ||
/** | ||
* Recrawl an url if the maximum of retries is no yet fetch | ||
* Recrawl an url if the maximum of retries is no yet fetched | ||
* | ||
@@ -395,0 +396,0 @@ * |
@@ -9,3 +9,3 @@ /** | ||
var Map = require("collections/fast-map"); | ||
var URI = require('../uri.js'); | ||
var URI = require('crawler-ninja-uri'); | ||
@@ -84,3 +84,3 @@ | ||
Store.prototype.isInCrawlHistory = function(url, callback) { | ||
/* TODO : | ||
/* TODO : | ||
process.nextTick(function() { | ||
@@ -87,0 +87,0 @@ callback(...); |
{ | ||
"name": "crawler-ninja", | ||
"version": "0.1.15", | ||
"version": "0.1.16", | ||
"description": "A web crawler made for the SEO based on plugins. Please wait or contribute ... still in beta", | ||
@@ -11,7 +11,7 @@ "main": "index.js", | ||
"dependencies": { | ||
"URIjs": "*", | ||
"crawler-ninja-uri" : "*", | ||
"crawler-ninja-logger": "*", | ||
"async": " *", | ||
"cheerio": "*", | ||
"collections": "*", | ||
"crawler-ninja-logger": "*", | ||
"crypto": "*", | ||
@@ -18,0 +18,0 @@ "request": "*", |
var crypto = require('crypto'); | ||
var Map = require("collections/fast-map"); | ||
var Set = require("collections/fast-set"); | ||
var URI = require('../lib/uri.js'); | ||
var URI = require('crawler-ninja-uri'); | ||
@@ -6,0 +6,0 @@ var CONTENT_TYPE_HEADER = "content-type"; |
@@ -5,3 +5,3 @@ /** | ||
*/ | ||
var URI = require('URIjs'); | ||
var URI = require('crawler-ninja-uri'); | ||
@@ -52,6 +52,4 @@ | ||
this.addHostname(URI.host(result.uri)); | ||
var uri = URI(result.uri); | ||
this.addHostname(uri.hostname()); | ||
if ($) { | ||
@@ -58,0 +56,0 @@ this.data.numberOfHTMLs++; |
@@ -25,3 +25,3 @@ Crawler Ninja | ||
Then accept the license & rerun : | ||
$ npm install crawler-ninja --save (sudo is not required) | ||
$ npm install crawler-ninja --save (sudo is not required, in theory) | ||
@@ -56,11 +56,10 @@ Crash course | ||
If you want used the default options (see below), you can pass null in the first argument of the method init. | ||
You can also reduce the scope of the crawl by using the different crawl options (see below the section : option references). | ||
## Create a new plugin | ||
The following code show you the events callbacks that your have to implement for creating a new plugin. | ||
The following code show you the functions that your have to implement for creating a new plugin. | ||
This is not mandatory to implement all plugin functions. You can also reduce the scope of the crawl by using the different crawl options (see below the section : option references). | ||
This is not mandatory to implement all plugin functions. | ||
```javascript | ||
@@ -177,3 +176,3 @@ function Plugin() { | ||
- links : if true crawl link tags, default = true. | ||
- linkTypes : the type of the links tags to crawl (match to the rel attribute), default = ["canonical", "stylesheet"]. | ||
- linkTypes : the type of the links tags to crawl (match to the rel attribute), default = ["canonical", "stylesheet", "icon"]. | ||
- images : if true crawl images, default = true. | ||
@@ -299,3 +298,3 @@ - depthLimit : the depth limit for the crawl, default is no limit. | ||
``` | ||
The previous code init the crawler with a dedug level. | ||
The previous code init the crawler with a dedug level. If you don't use proxies, set the proxyList argument to null. | ||
@@ -389,2 +388,6 @@ | ||
Utilities | ||
--------- | ||
- See on npm the module "crawler-ninja-uri" that can be used for extracting info and transforming URLs | ||
Current Plugins | ||
@@ -482,1 +485,5 @@ --------------- | ||
- Review README.md | ||
0.1.16 | ||
- Externalize URI.js in order to used it into Stores and plugins. | ||
- Review log info on error |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Wildcard dependency
QualityPackage has a dependency with a floating version range. This can cause issues if the dependency publishes a new major version.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Wildcard dependency
QualityPackage has a dependency with a floating version range. This can cause issues if the dependency publishes a new major version.
Found 1 instance in 1 package
484
8217454
79
10992
+ Addedcrawler-ninja-uri@*
+ Addedaxios@1.7.7(transitive)
+ Addedcrawler-ninja-uri@1.0.4(transitive)
+ Addedurijs@1.19.11(transitive)
- RemovedURIjs@*
- RemovedURIjs@1.16.1(transitive)
- Removedaxios@1.7.8(transitive)