Comparing version 0.0.1-0 to 0.0.1-4
@@ -37,10 +37,6 @@ (function() { | ||
return r = false; | ||
} else if (matchO.priority === prio && (r === true || r === void 0)) { | ||
return r = void 0; | ||
} | ||
} else if (matchO.type === 'allow') { | ||
if (matchO.priority > prio) { | ||
if (matchO.priority >= prio) { | ||
return r = true; | ||
} else if (matchO.priority === prio && (r === false || r === void 0)) { | ||
return r = void 0; | ||
} | ||
@@ -290,3 +286,3 @@ } | ||
if (user_agent == null) { | ||
user_agent = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'; | ||
user_agent = 'Mozilla/5.0 (compatible; Open-Source-Coffee-Script-Robots-Txt-Checker/2.1; +http://example.com/bot.html)'; | ||
} | ||
@@ -293,0 +289,0 @@ return new RobotsTxt(url, user_agent); |
@@ -19,3 +19,3 @@ { | ||
"main" : "./index.js", | ||
"version" : "0.0.1-0" | ||
"version" : "0.0.1-4" | ||
} |
@@ -6,3 +6,3 @@ robots.txt parser for node.js | ||
- robotstxt is currently alpha | ||
- robotstxt offers a way to obey the allow/disallow urles listed in the robots.txt | ||
- robotstxt offers an easy way to obey the allow/disallow rules listed in the sites robots.txt | ||
@@ -30,9 +30,9 @@ | ||
#returns false | ||
console.log gk.isAllowed 'http://www.google.com/setnewsprefs?sfsdfg' | ||
console.log gate_keeper.isAllowed 'http://www.google.com/setnewsprefs?sfsdfg' | ||
#returns false | ||
console.log gk.isAllowed '/setnewsprefs?sfsdfg' | ||
console.log gate_keeper.isAllowed '/setnewsprefs?sfsdfg' | ||
#returns true | ||
console.log gk.isDisallowed 'http://www.google.com/setnewsprefs?sfsdfg' | ||
console.log gate_keeper.isDisallowed 'http://www.google.com/setnewsprefs?sfsdfg' | ||
#returns true | ||
console.log gk.isDisallowed '/setnewsprefs?sfsdfg' | ||
console.log gate_keeper.isDisallowed '/setnewsprefs?sfsdfg' | ||
@@ -63,7 +63,7 @@ gate_keeper methods: | ||
#crawls and parses a robots.txt | ||
#throws an 'parsed' event | ||
#throws an 'crawled' event | ||
blank_robots_txt.crawl: (protocol, host, port, path, user_agent, encoding) | ||
#parses a txt string line after line | ||
#throws an 'ready' event | ||
#throws a 'ready' event | ||
blank_robots_txt.parse(txt) | ||
@@ -79,7 +79,27 @@ | ||
robotsTxt.on 'ready' (gate_keeper) | ||
**NOTES** | ||
the default user-agent used is | ||
#robotsTxt(url, user_agent) | ||
Mozilla/5.0 (compatible; Open-Source-Coffee-Script-Robots-Txt-Checker/2.1; +http://example.com/bot.html | ||
i strongly recommend using your own user agent | ||
i.e.: | ||
myapp_robots_txt = robotsTxt 'http://www.google.com/robots.txt', 'Mozilla/5.0 (compatible; MyAppBot/2.1; +http://www.example.com/)' | ||
if you want to simulate another crawler (for testing purposes only, of course) see this list for the correct user agent strings | ||
- [List of User Agent Strings] (http://www.useragentstring.com/pages/useragentstring.php) | ||
- [Googlebot] (http://www.google.com/support/webmasters/bin/answer.py?answer=1061943) | ||
ToDo | ||
--- | ||
- ready event also pass a sitemaps_in_robots_txt object | ||
- ready event should also pass a sitemaps_in_robots_txt object | ||
- sitemaps_in_robots_txt should offer methods to collect the urls listed in the sitemap | ||
@@ -86,0 +106,0 @@ |
(function() { | ||
var r, robotsTxt; | ||
var r, r2, robotsTxt; | ||
robotsTxt = require('../index.js'); | ||
r = robotsTxt('http://www.google.com/robots.txt', 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.iamnnotreallyagooglebot.com/)').on('ready', function(gk) { | ||
console.log(gk.isAllowed('/setnewsprefs?sfsdfg')); | ||
console.log(gk.isAllowed('/gp/richpub/syltguides/create/hudriwudri')); | ||
console.log(gk.isAllowed('http://www.google.com/setnewsprefs?sfsdfg')); | ||
console.log(gk.isAllowed('http://www.google.com/gp/richpub/syltguides/create/hudriwudri')); | ||
console.log(gk.isDisallowed('/news/directory?pz=1&cf=all&ned=us&hl=en&sort=users&category=4/')); | ||
console.log(gk.whatsUp('/news/directory?pz=1&cf=all&ned=us&hl=en&sort=users&category=4/')); | ||
console.log(gk.isDisallowed('/musics')); | ||
return console.log(gk.getGroup()); | ||
console.log(gk.getGroup()); | ||
return console.log(gk.why('/news/directory?pz=1&cf=all&ned=us&hl=en&sort=users&category=4/')); | ||
}); | ||
r2 = robotsTxt('http://www.google.com/robots.txt', 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.iamnnotreallyagooglebot.com/)'); | ||
}).call(this); |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
23350
106
332