Comparing version 0.0.4-5 to 0.0.4-6
176
index.js
(function() { | ||
var EventEmitter, GateKeeper, RobotsTxt, createRobotsTxt, parseUri, _; | ||
var __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }, __hasProp = Object.prototype.hasOwnProperty, __extends = function(child, parent) { | ||
for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; } | ||
function ctor() { this.constructor = child; } | ||
ctor.prototype = parent.prototype; | ||
child.prototype = new ctor; | ||
child.__super__ = parent.prototype; | ||
return child; | ||
}; | ||
var EventEmitter, GateKeeper, RobotsTxt, createRobotsTxt, parseUri, _, | ||
__bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }, | ||
__hasProp = Object.prototype.hasOwnProperty, | ||
__extends = function(child, parent) { for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor; child.__super__ = parent.prototype; return child; }; | ||
EventEmitter = require('events').EventEmitter; | ||
parseUri = require('./lib/parseuri.js'); | ||
_ = require("underscore"); | ||
_.mixin(require('underscore.string')); | ||
_.str = require('underscore.string'); | ||
_.mixin(_.str.exports()); | ||
RegExp.specialEscape = function(str) { | ||
@@ -20,3 +22,5 @@ var specials; | ||
}; | ||
GateKeeper = (function() { | ||
function GateKeeper(user_agent) { | ||
@@ -37,11 +41,11 @@ this.getCrawlDelay = __bind(this.getCrawlDelay, this); | ||
} | ||
GateKeeper.prototype.isAllowed = function(url, _allowed) { | ||
var a, check, matchO, prio, r, _i, _len; | ||
if (_allowed == null) { | ||
_allowed = true; | ||
} | ||
var a, check, matchO, prio, r, _i, _len, | ||
_this = this; | ||
if (_allowed == null) _allowed = true; | ||
a = this.whatsUp(url); | ||
r = true; | ||
prio = 0; | ||
check = __bind(function(matchO) { | ||
check = function(matchO) { | ||
if (matchO) { | ||
@@ -60,3 +64,3 @@ if (matchO.type === 'disallow') { | ||
} | ||
}, this); | ||
}; | ||
for (_i = 0, _len = a.length; _i < _len; _i++) { | ||
@@ -72,15 +76,20 @@ matchO = a[_i]; | ||
}; | ||
GateKeeper.prototype.isDisallowed = function(url) { | ||
return this.isAllowed(url, false); | ||
}; | ||
GateKeeper.prototype.whatsUp = function(url) { | ||
var group, r; | ||
var group, r, | ||
_this = this; | ||
url = this.cleanUrl(url); | ||
group = this.getGroup(); | ||
return r = this.groups[group].rules.map(__bind(function(e) { | ||
return r = this.groups[group].rules.map(function(e) { | ||
return e(url); | ||
}, this)); | ||
}); | ||
}; | ||
GateKeeper.prototype.why = function(url) { | ||
var a, conflict, matchO, r, ra, test, _i, _len; | ||
var a, conflict, matchO, r, ra, test, _i, _len, | ||
_this = this; | ||
url = this.cleanUrl(url); | ||
@@ -90,3 +99,3 @@ a = this.whatsUp(url); | ||
conflict = false; | ||
test = __bind(function(matchO) { | ||
test = function(matchO) { | ||
if (matchO) { | ||
@@ -110,3 +119,3 @@ if (!ra[0]) { | ||
} | ||
}, this); | ||
}; | ||
for (_i = 0, _len = a.length; _i < _len; _i++) { | ||
@@ -126,2 +135,3 @@ matchO = a[_i]; | ||
}; | ||
GateKeeper.prototype.cleanUrl = function(url) { | ||
@@ -142,10 +152,10 @@ var xu; | ||
}; | ||
GateKeeper.prototype.setUserAgent = function(user_agent) { | ||
return this.user_agent = user_agent.toLowerCase(); | ||
}; | ||
GateKeeper.prototype.getGroup = function(user_agent) { | ||
var k, key, keymatch, rkey, value, _ref; | ||
if (user_agent == null) { | ||
user_agent = this.user_agent; | ||
} | ||
if (user_agent == null) user_agent = this.user_agent; | ||
user_agent = user_agent.toLowerCase(); | ||
@@ -161,7 +171,3 @@ if (this.user_agent_group[user_agent]) { | ||
keymatch = user_agent.match(new RegExp(rkey)); | ||
if (keymatch) { | ||
if (key.length > k.length) { | ||
k = key; | ||
} | ||
} | ||
if (keymatch) if (key.length > k.length) k = key; | ||
} | ||
@@ -172,21 +178,26 @@ this.user_agent_group[user_agent] = k; | ||
}; | ||
GateKeeper.prototype.getCrawlDelay = function(user_agent) { | ||
var delay, _ref; | ||
if (user_agent == null) { | ||
user_agent = this.user_agent; | ||
} | ||
if (user_agent == null) user_agent = this.user_agent; | ||
user_agent = user_agent.toLowerCase(); | ||
delay = ((_ref = this.groups[user_agent]) != null ? _ref.crawl_delay : void 0) || this.groups['*'].crawl_delay; | ||
if (delay != null) { | ||
return Number(delay); | ||
} | ||
if (delay != null) return Number(delay); | ||
}; | ||
return GateKeeper; | ||
})(); | ||
RobotsTxt = (function() { | ||
RobotsTxt = (function(_super) { | ||
var rm, txt, txtA; | ||
__extends(RobotsTxt, EventEmitter); | ||
__extends(RobotsTxt, _super); | ||
txt = ''; | ||
txtA = []; | ||
rm = RobotsTxt; | ||
function RobotsTxt(url, user_agent) { | ||
@@ -202,22 +213,12 @@ this.url = url; | ||
} | ||
RobotsTxt.prototype.crawl = function(protocol, host, port, path, user_agent, encoding) { | ||
var handler, options, req; | ||
if (protocol == null) { | ||
protocol = this.uri.protocol; | ||
} | ||
if (host == null) { | ||
host = this.uri.host; | ||
} | ||
if (port == null) { | ||
port = this.uri.port; | ||
} | ||
if (path == null) { | ||
path = this.uri.path; | ||
} | ||
if (user_agent == null) { | ||
user_agent = this.user_agent; | ||
} | ||
if (encoding == null) { | ||
encoding = 'utf8'; | ||
} | ||
var handler, options, req, | ||
_this = this; | ||
if (protocol == null) protocol = this.uri.protocol; | ||
if (host == null) host = this.uri.host; | ||
if (port == null) port = this.uri.port; | ||
if (path == null) path = this.uri.path; | ||
if (user_agent == null) user_agent = this.user_agent; | ||
if (encoding == null) encoding = 'utf8'; | ||
txt = ''; | ||
@@ -233,31 +234,31 @@ txtA = []; | ||
}; | ||
req = handler.request(options, __bind(function(res) { | ||
req = handler.request(options, function(res) { | ||
var _ref; | ||
if ((200 <= (_ref = res.statusCode) && _ref < 300)) { | ||
res.setEncoding(encoding); | ||
res.on("data", __bind(function(chunk) { | ||
res.on("data", function(chunk) { | ||
return txtA.push(chunk); | ||
}, this)); | ||
res.on("end", __bind(function() { | ||
}); | ||
res.on("end", function() { | ||
txt = txtA.join(''); | ||
this.emit("crawled", txt); | ||
return this.parse(txt); | ||
}, this)); | ||
_this.emit("crawled", txt); | ||
return _this.parse(txt); | ||
}); | ||
return null; | ||
} else { | ||
return this.emit("error", new Error('invalid status code - is: HTTP ' + res.statusCode + ' - should: HTTP 200')); | ||
return _this.emit("error", new Error('invalid status code - is: HTTP ' + res.statusCode + ' - should: HTTP 200')); | ||
} | ||
}, this)); | ||
}); | ||
req.setHeader("User-Agent", user_agent); | ||
req.end(); | ||
null; | ||
return req.on('error', __bind(function(e) { | ||
return this.emit("error", e); | ||
}, this)); | ||
return req.on('error', function(e) { | ||
return _this.emit("error", e); | ||
}); | ||
}; | ||
RobotsTxt.prototype.parse = function(txt) { | ||
var currUserAgentGroup, evaluate, groupGroupsA, line, lineA, line_counter, myGateKeeper, _i, _len; | ||
if (txt == null) { | ||
txt = txt; | ||
} | ||
var currUserAgentGroup, evaluate, groupGroupsA, line, lineA, line_counter, myGateKeeper, _i, _len, | ||
_this = this; | ||
if (txt == null) txt = txt; | ||
lineA = txt.split("\n"); | ||
@@ -267,3 +268,3 @@ myGateKeeper = void 0; | ||
groupGroupsA = []; | ||
evaluate = __bind(function(line, nr) { | ||
evaluate = function(line, nr) { | ||
var doublepoint, groupname, kvA, regExStr, rx, _i, _len, _ref, _results; | ||
@@ -274,3 +275,3 @@ line = _.trim(line); | ||
doublepoint = line.indexOf(':'); | ||
if (_(line).includes('#')) { | ||
if (_.str.include(line, '#')) { | ||
kvA = [line.substr(0, doublepoint), line.substr(doublepoint + 1, line.indexOf('#') - (doublepoint + 1))]; | ||
@@ -280,5 +281,3 @@ } else { | ||
} | ||
if (kvA.length !== 2) { | ||
return false; | ||
} | ||
if (kvA.length !== 2) return false; | ||
kvA = kvA.map(function(i) { | ||
@@ -291,3 +290,3 @@ return _(i).trim(); | ||
delete myGateKeeper; | ||
myGateKeeper = new GateKeeper(this.user_agent); | ||
myGateKeeper = new GateKeeper(_this.user_agent); | ||
} | ||
@@ -313,5 +312,3 @@ if ((currUserAgentGroup != null ? (_ref = currUserAgentGroup.rules) != null ? _ref.length : void 0 : void 0) === 0) { | ||
} | ||
} else if (kvA[0] === 'sitemap') { | ||
; | ||
} else if (kvA[0] === 'crawl-delay') { | ||
} else if (kvA[0] === 'sitemap') {} else if (kvA[0] === 'crawl-delay') { | ||
if (currUserAgentGroup) { | ||
@@ -325,5 +322,3 @@ return currUserAgentGroup.crawl_delay = kvA[1]; | ||
} else { | ||
if (regExStr[0] !== '/') { | ||
regExStr = '/' + regExStr; | ||
} | ||
if (regExStr[0] !== '/') regExStr = '/' + regExStr; | ||
regExStr = RegExp.specialEscape(regExStr); | ||
@@ -367,5 +362,5 @@ regExStr = regExStr.replace(/\*/g, '.*'); | ||
} else { | ||
; | ||
} | ||
}, this); | ||
}; | ||
line_counter = 0; | ||
@@ -382,4 +377,7 @@ for (_i = 0, _len = lineA.length; _i < _len; _i++) { | ||
}; | ||
return RobotsTxt; | ||
})(); | ||
})(EventEmitter); | ||
createRobotsTxt = function(url, user_agent) { | ||
@@ -391,3 +389,5 @@ if (user_agent == null) { | ||
}; | ||
module.exports = createRobotsTxt; | ||
}).call(this); |
@@ -19,3 +19,3 @@ { | ||
"main" : "./index.js", | ||
"version" : "0.0.4-5" | ||
"version" : "0.0.4-6" | ||
} |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
32241
403