New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

simplecrawler

Package Overview
Dependencies
Maintainers
2
Versions
70
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

simplecrawler - npm Package Compare versions

Comparing version 0.2.6 to 0.2.7

42

lib/crawler.js

@@ -571,23 +571,20 @@ // Simplecrawler

if (crawler.domainValid(parsedURL.host)) {
try {
crawler.queue.add(
parsedURL.protocol,
parsedURL.host,
parsedURL.port,
parsedURL.path,
function queueAddCallback(error,newQueueItem) {
if (error) {
// We received an error condition when adding the callback
crawler._emitSpecial("queueerror",error,parsedURL);
} else {
crawler._emitSpecial("queueadd",newQueueItem,parsedURL);
newQueueItem.referrer = queueItem ? queueItem.url : null;
}
crawler.queue.add(
parsedURL.protocol,
parsedURL.host,
parsedURL.port,
parsedURL.path,
function queueAddCallback(error,newQueueItem) {
if (error) {
// We received an error condition when adding the callback
if (error.code && error.code === "DUP")
return crawler._emitSpecial("queueduplicate",parsedURL);
return crawler._emitSpecial("queueerror",error,parsedURL);
}
);
} catch(error) {
// If we caught an error, emit queueerror
crawler._emitSpecial("queueerror",error,parsedURL);
return false;
}
crawler._emitSpecial("queueadd",newQueueItem,parsedURL);
newQueueItem.referrer = queueItem ? queueItem.url : null;
}
);
}

@@ -658,2 +655,7 @@

};
// If port is one of the HTTP/HTTPS defaults, delete the option to avoid conflicts
if (requestOptions.port === 80 || requestOptions.port === 443) {
delete requestOptions.port;
}

@@ -660,0 +662,0 @@ // Add cookie header from cookie jar if we're configured to

@@ -29,3 +29,3 @@ // Simplecrawler - queue module

var self = this;
// Ensure all variables conform to reasonable defaults

@@ -37,9 +37,9 @@ protocol = protocol === "https" ? "https" : "http";

}
var url = protocol + "://" + domain + (port !== 80 ? ":" + port : "") + path;
this.exists(protocol,domain,port,path,
function(err,exists) {
if (err) return callback(err);
if (!exists) {

@@ -56,7 +56,10 @@ var queueItem = {

};
self.push(queueItem);
callback(null,queueItem);
} else {
callback(new Error("Resource already exists in queue!"));
var error = new Error("Resource already exists in queue!");
error.code = "DUP";
callback(error);
}

@@ -69,5 +72,9 @@ });

callback = callback && callback instanceof Function ? callback : function(){};
var url = (protocol + "://" + domain + (port !== 80 ? ":" + port : "") + path).toLowerCase();
port = (port !== 80 ? ":" + port : "");
var url =
(protocol + "://" + domain + port + path)
.toLowerCase();
if (!!this.scanIndex[url]) {

@@ -85,3 +92,3 @@ callback(null,1);

var self = this;
callback(null,self[self.length-1]);

@@ -94,3 +101,3 @@ };

var self = this;
if (!isNaN(id) && self.length > id) {

@@ -105,3 +112,3 @@ return callback(null,self[id]);

var self = this;
for (var itemIndex = self.oldestUnfetchedIndex; itemIndex < self.length; itemIndex ++) {

@@ -113,3 +120,3 @@ if (self[itemIndex].status === "queued") {

}
callback(new Error("No unfetched items remain."));

@@ -127,3 +134,3 @@ };

}
self.forEach(function(item) {

@@ -134,3 +141,3 @@ if (item.fetched && item.stateData[statisticName] !== null && item.stateData[statisticName] > maxStatisticValue) {

});
callback(null,maxStatisticValue);

@@ -148,3 +155,3 @@ };

}
self.forEach(function(item) {

@@ -168,3 +175,3 @@ if (item.fetched && item.stateData[statisticName] !== null && item.stateData[statisticName] < minStatisticValue) {

}
self.forEach(function(item) {

@@ -176,3 +183,3 @@ if (item.fetched && item.stateData[statisticName] !== null && !isNaN(item.stateData[statisticName])) {

});
callback(null,NumberSum / NumberCount);

@@ -191,3 +198,3 @@ };

});
callback(null,NumberComplete);

@@ -215,3 +222,3 @@ return NumberComplete;

var subqueue = [], self = this;
self.forEach(function(item,index) {

@@ -223,3 +230,3 @@ if (item.status === status) {

});
callback(null,subqueue);

@@ -232,3 +239,3 @@ };

var self = this;
self.countWithStatus("failed",function(err1,failed) {

@@ -245,3 +252,3 @@ self.countWithStatus("notfound",function(err2,notfound) {

var self = this;
// Re-queue in-progress items before freezing...

@@ -263,10 +270,10 @@ self.forEach(function(item) {

var fileData, self = this, defrostedQueue = [];
fs.readFile(filename,function(err,fileData) {
if (err) return callback(err);
if (!fileData.toString("utf8").length) {
return callback(new Error("Failed to defrost queue from zero-length JSON."));
}
try {

@@ -277,3 +284,3 @@ defrostedQueue = JSON.parse(fileData.toString("utf8"));

}
for (var index in defrostedQueue) {

@@ -285,5 +292,5 @@ if (defrostedQueue.hasOwnProperty(index) && !isNaN(index)) {

}
callback(null,self);
});
};
{
"name": "simplecrawler",
"description": "Very straigntforward web crawler. Uses EventEmitter. Generates queue statistics and has a basic cache mechanism with extensible backend.",
"version": "0.2.6",
"version": "0.2.7",
"homepage": "http://github.com/cgiffard/node-simplecrawler",

@@ -6,0 +6,0 @@ "author": "Christopher Giffard <christopher.giffard@cgiffard.com>",

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc