robotstxt - npm Package Compare versions

Comparing version 0.0.1-0 to 0.0.1-4

index.js

		@@ -37,10 +37,6 @@ (function() {
		return r = false;
		} else if (matchO.priority === prio && (r === true \|\| r === void 0)) {
		return r = void 0;
		}
		} else if (matchO.type === 'allow') {
		if (matchO.priority > prio) {
		if (matchO.priority >= prio) {
		return r = true;
		} else if (matchO.priority === prio && (r === false \|\| r === void 0)) {
		return r = void 0;
		}
		@@ -290,3 +286,3 @@ }
		if (user_agent == null) {
		user_agent = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)';
		user_agent = 'Mozilla/5.0 (compatible; Open-Source-Coffee-Script-Robots-Txt-Checker/2.1; +http://example.com/bot.html)';
		}
		@@ -293,0 +289,0 @@ return new RobotsTxt(url, user_agent);

package.json

		@@ -19,3 +19,3 @@ {
		"main" : "./index.js",
		"version" : "0.0.1-0"
		"version" : "0.0.1-4"
		}

README.md

		@@ -6,3 +6,3 @@ robots.txt parser for node.js
		- robotstxt is currently alpha
		- robotstxt offers a way to obey the allow/disallow urles listed in the robots.txt
		- robotstxt offers an easy way to obey the allow/disallow rules listed in the sites robots.txt

		@@ -30,9 +30,9 @@
		#returns false
		console.log gk.isAllowed 'http://www.google.com/setnewsprefs?sfsdfg'
		console.log gate_keeper.isAllowed 'http://www.google.com/setnewsprefs?sfsdfg'
		#returns false
		console.log gk.isAllowed '/setnewsprefs?sfsdfg'
		console.log gate_keeper.isAllowed '/setnewsprefs?sfsdfg'
		#returns true
		console.log gk.isDisallowed 'http://www.google.com/setnewsprefs?sfsdfg'
		console.log gate_keeper.isDisallowed 'http://www.google.com/setnewsprefs?sfsdfg'
		#returns true
		console.log gk.isDisallowed '/setnewsprefs?sfsdfg'
		console.log gate_keeper.isDisallowed '/setnewsprefs?sfsdfg'

		@@ -63,7 +63,7 @@ gate_keeper methods:
		#crawls and parses a robots.txt
		#throws an 'parsed' event
		#throws an 'crawled' event
		blank_robots_txt.crawl: (protocol, host, port, path, user_agent, encoding)

		#parses a txt string line after line
		#throws an 'ready' event
		#throws a 'ready' event
		blank_robots_txt.parse(txt)
		@@ -79,7 +79,27 @@
		robotsTxt.on 'ready' (gate_keeper)



		NOTES

		the default user-agent used is

		#robotsTxt(url, user_agent)
		Mozilla/5.0 (compatible; Open-Source-Coffee-Script-Robots-Txt-Checker/2.1; +http://example.com/bot.html

		i strongly recommend using your own user agent

		i.e.:

		myapp_robots_txt = robotsTxt 'http://www.google.com/robots.txt', 'Mozilla/5.0 (compatible; MyAppBot/2.1; +http://www.example.com/)'


		if you want to simulate another crawler (for testing purposes only, of course) see this list for the correct user agent strings

		- [List of User Agent Strings] (http://www.useragentstring.com/pages/useragentstring.php)
		- [Googlebot] (http://www.google.com/support/webmasters/bin/answer.py?answer=1061943)


		ToDo
		---
		- ready event also pass a sitemaps_in_robots_txt object
		- ready event should also pass a sitemaps_in_robots_txt object
		- sitemaps_in_robots_txt should offer methods to collect the urls listed in the sitemap
		@@ -86,0 +106,0 @@

samples/simple_test.js

		(function() {
		var r, robotsTxt;
		var r, r2, robotsTxt;
		robotsTxt = require('../index.js');
		r = robotsTxt('http://www.google.com/robots.txt', 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.iamnnotreallyagooglebot.com/)').on('ready', function(gk) {
		console.log(gk.isAllowed('/setnewsprefs?sfsdfg'));
		console.log(gk.isAllowed('/gp/richpub/syltguides/create/hudriwudri'));
		console.log(gk.isAllowed('http://www.google.com/setnewsprefs?sfsdfg'));
		console.log(gk.isAllowed('http://www.google.com/gp/richpub/syltguides/create/hudriwudri'));
		console.log(gk.isDisallowed('/news/directory?pz=1&cf=all&ned=us&hl=en&sort=users&category=4/'));
		console.log(gk.whatsUp('/news/directory?pz=1&cf=all&ned=us&hl=en&sort=users&category=4/'));
		console.log(gk.isDisallowed('/musics'));
		return console.log(gk.getGroup());
		console.log(gk.getGroup());
		return console.log(gk.why('/news/directory?pz=1&cf=all&ned=us&hl=en&sort=users&category=4/'));
		});
		r2 = robotsTxt('http://www.google.com/robots.txt', 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.iamnnotreallyagooglebot.com/)');
		}).call(this);

index.coffee

Sorry, the diff of this file is not supported yet

samples/simple_test.coffee

Sorry, the diff of this file is not supported yet

robotstxt - npm Package Compare versions

Improved metrics

Worsened metrics