New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

robotstxt

Package Overview
Dependencies
Maintainers
0
Versions
17
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

robotstxt - npm Package Compare versions

Comparing version 0.0.1-0 to 0.0.1-4

8

index.js

@@ -37,10 +37,6 @@ (function() {

return r = false;
} else if (matchO.priority === prio && (r === true || r === void 0)) {
return r = void 0;
}
} else if (matchO.type === 'allow') {
if (matchO.priority > prio) {
if (matchO.priority >= prio) {
return r = true;
} else if (matchO.priority === prio && (r === false || r === void 0)) {
return r = void 0;
}

@@ -290,3 +286,3 @@ }

if (user_agent == null) {
user_agent = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)';
user_agent = 'Mozilla/5.0 (compatible; Open-Source-Coffee-Script-Robots-Txt-Checker/2.1; +http://example.com/bot.html)';
}

@@ -293,0 +289,0 @@ return new RobotsTxt(url, user_agent);

@@ -19,3 +19,3 @@ {

"main" : "./index.js",
"version" : "0.0.1-0"
"version" : "0.0.1-4"
}

@@ -6,3 +6,3 @@ robots.txt parser for node.js

- robotstxt is currently alpha
- robotstxt offers a way to obey the allow/disallow urles listed in the robots.txt
- robotstxt offers an easy way to obey the allow/disallow rules listed in the sites robots.txt

@@ -30,9 +30,9 @@

#returns false
console.log gk.isAllowed 'http://www.google.com/setnewsprefs?sfsdfg'
console.log gate_keeper.isAllowed 'http://www.google.com/setnewsprefs?sfsdfg'
#returns false
console.log gk.isAllowed '/setnewsprefs?sfsdfg'
console.log gate_keeper.isAllowed '/setnewsprefs?sfsdfg'
#returns true
console.log gk.isDisallowed 'http://www.google.com/setnewsprefs?sfsdfg'
console.log gate_keeper.isDisallowed 'http://www.google.com/setnewsprefs?sfsdfg'
#returns true
console.log gk.isDisallowed '/setnewsprefs?sfsdfg'
console.log gate_keeper.isDisallowed '/setnewsprefs?sfsdfg'

@@ -63,7 +63,7 @@ gate_keeper methods:

#crawls and parses a robots.txt
#throws an 'parsed' event
#throws an 'crawled' event
blank_robots_txt.crawl: (protocol, host, port, path, user_agent, encoding)
#parses a txt string line after line
#throws an 'ready' event
#throws a 'ready' event
blank_robots_txt.parse(txt)

@@ -79,7 +79,27 @@

robotsTxt.on 'ready' (gate_keeper)
**NOTES**
the default user-agent used is
#robotsTxt(url, user_agent)
Mozilla/5.0 (compatible; Open-Source-Coffee-Script-Robots-Txt-Checker/2.1; +http://example.com/bot.html
i strongly recommend using your own user agent
i.e.:
myapp_robots_txt = robotsTxt 'http://www.google.com/robots.txt', 'Mozilla/5.0 (compatible; MyAppBot/2.1; +http://www.example.com/)'
if you want to simulate another crawler (for testing purposes only, of course) see this list for the correct user agent strings
- [List of User Agent Strings] (http://www.useragentstring.com/pages/useragentstring.php)
- [Googlebot] (http://www.google.com/support/webmasters/bin/answer.py?answer=1061943)
ToDo
---
- ready event also pass a sitemaps_in_robots_txt object
- ready event should also pass a sitemaps_in_robots_txt object
- sitemaps_in_robots_txt should offer methods to collect the urls listed in the sitemap

@@ -86,0 +106,0 @@

(function() {
var r, robotsTxt;
var r, r2, robotsTxt;
robotsTxt = require('../index.js');
r = robotsTxt('http://www.google.com/robots.txt', 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.iamnnotreallyagooglebot.com/)').on('ready', function(gk) {
console.log(gk.isAllowed('/setnewsprefs?sfsdfg'));
console.log(gk.isAllowed('/gp/richpub/syltguides/create/hudriwudri'));
console.log(gk.isAllowed('http://www.google.com/setnewsprefs?sfsdfg'));
console.log(gk.isAllowed('http://www.google.com/gp/richpub/syltguides/create/hudriwudri'));
console.log(gk.isDisallowed('/news/directory?pz=1&cf=all&ned=us&hl=en&sort=users&category=4/'));
console.log(gk.whatsUp('/news/directory?pz=1&cf=all&ned=us&hl=en&sort=users&category=4/'));
console.log(gk.isDisallowed('/musics'));
return console.log(gk.getGroup());
console.log(gk.getGroup());
return console.log(gk.why('/news/directory?pz=1&cf=all&ned=us&hl=en&sort=users&category=4/'));
});
r2 = robotsTxt('http://www.google.com/robots.txt', 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.iamnnotreallyagooglebot.com/)');
}).call(this);

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc