New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

simplecrawler

Package Overview
Dependencies
Maintainers
2
Versions
70
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

simplecrawler - npm Package Compare versions

Comparing version 0.0.9 to 0.0.10

42

index.js

@@ -7,11 +7,15 @@ // Simplecrawler

// Queue Dependency
var FetchQueue = require("./queue.js").queue;
var Cache = require("./cache.js").Cache;
var EventEmitter = require('events').EventEmitter;
var http = require("http"),
https = require("https");
var FetchQueue = require("./queue.js").queue,
Cache = require("./cache.js").Cache
MetaInfo = require("./package.json");
var http = require("http"),
https = require("https"),
EventEmitter = require('events').EventEmitter;
// Crawler Constructor
var Crawler = function(host,initialPath,initialPort,interval) {
// SETTINGS TO STUFF WITH (not here! Do it when you create a `new Crawler()`)
// SETTINGS TO STUFF WITH
// (not here! Do it when you create a `new Crawler()`)
// Domain to crawl

@@ -25,7 +29,9 @@ this.host = host || "";

// Internal 'tick' interval for spawning new requests (as long as concurrency is under cap)
// Internal 'tick' interval for spawning new requests
// (as long as concurrency is under cap)
// One request will be spooled per tick, up to the concurrency threshold.
this.interval = interval || 250;
// Maximum request concurrency. Be sensible. Five ties in with node's default maxSockets value.
// Maximum request concurrency. Be sensible. Five ties in with node's
// default maxSockets value.
this.maxConcurrency = 5;

@@ -37,9 +43,13 @@

// User Agent
this.userAgent = "Node/SimpleCrawler 0.0.8 (http://www.github.com/cgiffard/node-simplecrawler)";
this.userAgent
= "Node/" + MetaInfo.name + " " + MetaInfo.version +
" (" + MetaInfo.repository.url + ")";
// Queue for requests - FetchQueue gives us stats and other sugar (but it's basically just an array)
// Queue for requests - FetchQueue gives us stats and other sugar
// (but it's basically just an array)
this.queue = new FetchQueue();
// Do we filter by domain?
// Unless you want to be crawling the entire internet, I would recommend leaving this on!
// Unless you want to be crawling the entire internet, I would
// recommend leaving this on!
this.filterByDomain = true;

@@ -50,3 +60,4 @@

// Treat WWW subdomain the same as the main domain (and don't count it as a separate subdomain)
// Treat WWW subdomain the same as the main domain (and don't count
// it as a separate subdomain)
this.ignoreWWWDomain = true;

@@ -57,3 +68,4 @@

// Use simplecrawler's internal resource discovery function (switch it off if you'd prefer to discover and queue resources yourself!)
// Use simplecrawler's internal resource discovery function (switch it off
// if you'd prefer to discover and queue resources yourself!)
this.discoverResources = true;

@@ -121,5 +133,5 @@

// Check whether we're global, domain-absolute or relative
if (URL.match(/^http(s)?:\/\//i)) {
if (URL.match(/^http(s)?:\/\//i) || URL.match(/^\/\//)) {
// We're global. Try and extract domain and port
split = URL.replace(/^http(s)?:\/\//i,"").split(/\//g);
split = URL.replace(/^(http(s)?:)?\/\//i,"").split(/\//g);
hostData = split[0] && split[0].length ? split[0] : host;

@@ -126,0 +138,0 @@

{
"name": "simplecrawler",
"description": "Very straigntforward web crawler. Uses EventEmitter. Generates queue statistics and has a basic cache mechanism with extensible backend.",
"version": "0.0.9",
"version": "0.0.10",
"homepage": "http://github.com/cgiffard/node-simplecrawler",

@@ -6,0 +6,0 @@ "author": "Christopher Giffard <christopher.giffard@cgiffard.com>",

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc