Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

robotstxt

Package Overview
Dependencies
Maintainers
1
Versions
17
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

robotstxt - npm Package Compare versions

Comparing version 0.0.4-6 to 0.0.4-7

196

index.js
(function() {
var EventEmitter, GateKeeper, RobotsTxt, createRobotsTxt, parseUri, _,
__bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; },
__hasProp = Object.prototype.hasOwnProperty,
__extends = function(child, parent) { for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor; child.__super__ = parent.prototype; return child; };
var EventEmitter, GateKeeper, RobotsTxt, createRobotsTxt, parseUri, _;
var __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }, __hasProp = Object.prototype.hasOwnProperty, __extends = function(child, parent) {
for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; }
function ctor() { this.constructor = child; }
ctor.prototype = parent.prototype;
child.prototype = new ctor;
child.__super__ = parent.prototype;
return child;
};
EventEmitter = require('events').EventEmitter;
parseUri = require('./lib/parseuri.js');
_ = require("underscore");
_.str = require('underscore.string');
_.mixin(_.str.exports());
RegExp.specialEscape = function(str) {

@@ -22,5 +21,3 @@ var specials;

};
GateKeeper = (function() {
function GateKeeper(user_agent) {

@@ -37,2 +34,4 @@ this.getCrawlDelay = __bind(this.getCrawlDelay, this);

this.groups = {};
this.sitemaps = [];
this.comments = [];
this.user_agent_group = {

@@ -42,11 +41,11 @@ '*': '*'

}
GateKeeper.prototype.isAllowed = function(url, _allowed) {
var a, check, matchO, prio, r, _i, _len,
_this = this;
if (_allowed == null) _allowed = true;
var a, check, matchO, prio, r, _i, _len;
if (_allowed == null) {
_allowed = true;
}
a = this.whatsUp(url);
r = true;
prio = 0;
check = function(matchO) {
check = __bind(function(matchO) {
if (matchO) {

@@ -65,3 +64,3 @@ if (matchO.type === 'disallow') {

}
};
}, this);
for (_i = 0, _len = a.length; _i < _len; _i++) {

@@ -77,20 +76,15 @@ matchO = a[_i];

};
GateKeeper.prototype.isDisallowed = function(url) {
return this.isAllowed(url, false);
};
GateKeeper.prototype.whatsUp = function(url) {
var group, r,
_this = this;
var group, r;
url = this.cleanUrl(url);
group = this.getGroup();
return r = this.groups[group].rules.map(function(e) {
return r = this.groups[group].rules.map(__bind(function(e) {
return e(url);
});
}, this));
};
GateKeeper.prototype.why = function(url) {
var a, conflict, matchO, r, ra, test, _i, _len,
_this = this;
var a, conflict, matchO, r, ra, test, _i, _len;
url = this.cleanUrl(url);

@@ -100,3 +94,3 @@ a = this.whatsUp(url);

conflict = false;
test = function(matchO) {
test = __bind(function(matchO) {
if (matchO) {

@@ -112,3 +106,3 @@ if (!ra[0]) {

} else if (matchO.priority === ra[0].priority) {
if (matchO.type === r[0].type) {
if (matchO.type === ra[0].type) {
return ra.push(matchO);

@@ -121,3 +115,3 @@ } else {

}
};
}, this);
for (_i = 0, _len = a.length; _i < _len; _i++) {

@@ -137,3 +131,2 @@ matchO = a[_i];

};
GateKeeper.prototype.cleanUrl = function(url) {

@@ -154,10 +147,10 @@ var xu;

};
GateKeeper.prototype.setUserAgent = function(user_agent) {
return this.user_agent = user_agent.toLowerCase();
};
GateKeeper.prototype.getGroup = function(user_agent) {
var k, key, keymatch, rkey, value, _ref;
if (user_agent == null) user_agent = this.user_agent;
if (user_agent == null) {
user_agent = this.user_agent;
}
user_agent = user_agent.toLowerCase();

@@ -173,3 +166,7 @@ if (this.user_agent_group[user_agent]) {

keymatch = user_agent.match(new RegExp(rkey));
if (keymatch) if (key.length > k.length) k = key;
if (keymatch) {
if (key.length > k.length) {
k = key;
}
}
}

@@ -180,26 +177,21 @@ this.user_agent_group[user_agent] = k;

};
GateKeeper.prototype.getCrawlDelay = function(user_agent) {
var delay, _ref;
if (user_agent == null) user_agent = this.user_agent;
if (user_agent == null) {
user_agent = this.user_agent;
}
user_agent = user_agent.toLowerCase();
delay = ((_ref = this.groups[user_agent]) != null ? _ref.crawl_delay : void 0) || this.groups['*'].crawl_delay;
if (delay != null) return Number(delay);
if (delay != null) {
return Number(delay);
}
};
return GateKeeper;
})();
RobotsTxt = (function(_super) {
RobotsTxt = (function() {
var rm, txt, txtA;
__extends(RobotsTxt, _super);
__extends(RobotsTxt, EventEmitter);
txt = '';
txtA = [];
rm = RobotsTxt;
function RobotsTxt(url, user_agent) {

@@ -215,12 +207,22 @@ this.url = url;

}
RobotsTxt.prototype.crawl = function(protocol, host, port, path, user_agent, encoding) {
var handler, options, req,
_this = this;
if (protocol == null) protocol = this.uri.protocol;
if (host == null) host = this.uri.host;
if (port == null) port = this.uri.port;
if (path == null) path = this.uri.path;
if (user_agent == null) user_agent = this.user_agent;
if (encoding == null) encoding = 'utf8';
var handler, options, req;
if (protocol == null) {
protocol = this.uri.protocol;
}
if (host == null) {
host = this.uri.host;
}
if (port == null) {
port = this.uri.port;
}
if (path == null) {
path = this.uri.path;
}
if (user_agent == null) {
user_agent = this.user_agent;
}
if (encoding == null) {
encoding = 'utf8';
}
txt = '';

@@ -236,31 +238,31 @@ txtA = [];

};
req = handler.request(options, function(res) {
req = handler.request(options, __bind(function(res) {
var _ref;
if ((200 <= (_ref = res.statusCode) && _ref < 300)) {
res.setEncoding(encoding);
res.on("data", function(chunk) {
res.on("data", __bind(function(chunk) {
return txtA.push(chunk);
});
res.on("end", function() {
}, this));
res.on("end", __bind(function() {
txt = txtA.join('');
_this.emit("crawled", txt);
return _this.parse(txt);
});
this.emit("crawled", txt);
return this.parse(txt);
}, this));
return null;
} else {
return _this.emit("error", new Error('invalid status code - is: HTTP ' + res.statusCode + ' - should: HTTP 200'));
return this.emit("error", new Error('invalid status code - is: HTTP ' + res.statusCode + ' - should: HTTP 200'));
}
});
}, this));
req.setHeader("User-Agent", user_agent);
req.end();
null;
return req.on('error', function(e) {
return _this.emit("error", e);
});
return req.on('error', __bind(function(e) {
return this.emit("error", e);
}, this));
};
RobotsTxt.prototype.parse = function(txt) {
var currUserAgentGroup, evaluate, groupGroupsA, line, lineA, line_counter, myGateKeeper, _i, _len,
_this = this;
if (txt == null) txt = txt;
var currUserAgentGroup, evaluate, groupGroupsA, line, lineA, line_counter, myGateKeeper, _i, _len;
if (txt == null) {
txt = txt;
}
lineA = txt.split("\n");

@@ -270,4 +272,4 @@ myGateKeeper = void 0;

groupGroupsA = [];
evaluate = function(line, nr) {
var doublepoint, groupname, kvA, regExStr, rx, _i, _len, _ref, _results;
evaluate = __bind(function(line, nr) {
var doublepoint, groupname, kvA, regExStr, rule, rx, _i, _len, _ref, _results;
line = _.trim(line);

@@ -282,3 +284,5 @@ if (!_(line).startsWith('#')) {

}
if (kvA.length !== 2) return false;
if (kvA.length !== 2) {
return false;
}
kvA = kvA.map(function(i) {

@@ -291,3 +295,3 @@ return _(i).trim();

delete myGateKeeper;
myGateKeeper = new GateKeeper(_this.user_agent);
myGateKeeper = new GateKeeper(this.user_agent);
}

@@ -313,3 +317,8 @@ if ((currUserAgentGroup != null ? (_ref = currUserAgentGroup.rules) != null ? _ref.length : void 0 : void 0) === 0) {

}
} else if (kvA[0] === 'sitemap') {} else if (kvA[0] === 'crawl-delay') {
} else if (kvA[0] === 'sitemap') {
return myGateKeeper.sitemaps.push({
line: line,
linenumber: nr
});
} else if (kvA[0] === 'crawl-delay') {
if (currUserAgentGroup) {

@@ -323,3 +332,5 @@ return currUserAgentGroup.crawl_delay = kvA[1];

} else {
if (regExStr[0] !== '/') regExStr = '/' + regExStr;
if (regExStr[0] !== '/') {
regExStr = '/' + regExStr;
}
regExStr = RegExp.specialEscape(regExStr);

@@ -335,3 +346,3 @@ regExStr = regExStr.replace(/\*/g, '.*');

if (currUserAgentGroup) {
return currUserAgentGroup.rules.push(function(url) {
rule = function(url) {
var r, url_match;

@@ -358,3 +369,12 @@ if (url) {

}
});
};
rule.about = {
line: line,
linenumber: nr,
priority: kvA[1].length,
type: kvA[0],
rule: kvA[1],
regexstr: regExStr
};
return currUserAgentGroup.rules.push(rule);
}

@@ -365,5 +385,8 @@ }

} else {
return myGateKeeper.comments.push({
line: line,
linenumber: nr
});
}
};
}, this);
line_counter = 0;

@@ -380,7 +403,4 @@ for (_i = 0, _len = lineA.length; _i < _len; _i++) {

};
return RobotsTxt;
})(EventEmitter);
})();
createRobotsTxt = function(url, user_agent) {

@@ -392,5 +412,3 @@ if (user_agent == null) {

};
module.exports = createRobotsTxt;
}).call(this);

@@ -19,3 +19,3 @@ {

"main" : "./index.js",
"version" : "0.0.4-6"
"version" : "0.0.4-7"
}

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc