New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

robots-parser

Package Overview
Dependencies
Maintainers
1
Versions
11
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

robots-parser - npm Package Compare versions

Comparing version 2.2.0 to 2.3.0

52

package.json
{
"name": "robots-parser",
"version": "2.2.0",
"description": "Robots.txt parser.",
"main": "index.js",
"directories": {
"test": "tests"
},
"scripts": {
"test": "nyc --clean --reporter html --report-dir ./report/coverage ./node_modules/.bin/mocha"
},
"repository": {
"type": "git",
"url": "https://github.com/samclarke/robots-parser.git"
},
"author": "Sam Clarke <sam@samclarke.com>",
"license": "MIT",
"devDependencies": {
"chai": "^4.2.0",
"mocha": "^6.1.4",
"nyc": "^14.1.1"
}
"name": "robots-parser",
"version": "2.3.0",
"description": "Robots.txt parser.",
"main": "index.js",
"directories": {
"test": "tests"
},
"scripts": {
"test": "nyc --reporter=text-summary --reporter=html --reporter=lcovonly mocha"
},
"repository": {
"type": "git",
"url": "https://github.com/samclarke/robots-parser.git"
},
"author": "Sam Clarke <sam@samclarke.com>",
"license": "MIT",
"files": [
"/Robots.js",
"/index.js"
],
"prettier": {
"tabWidth": 4,
"useTabs": true,
"singleQuote": true,
"trailingComma": "none"
},
"devDependencies": {
"chai": "^4.2.0",
"mocha": "^6.1.4",
"nyc": "^14.1.1"
}
}

@@ -1,2 +0,2 @@

# Robots Parser [![DeepScan Grade](https://deepscan.io/api/projects/1275/branches/3378/badge/grade.svg)](https://deepscan.io/dashboard/#view=project&pid=1275&bid=3378) [![GitHub license](https://img.shields.io/github/license/samclarke/robots-parser.svg)](https://github.com/samclarke/robots-parser/blob/master/license.md)
# Robots Parser [![DeepScan grade](https://deepscan.io/api/teams/457/projects/16277/branches/344939/badge/grade.svg)](https://deepscan.io/dashboard#view=project&tid=457&pid=16277&bid=344939) [![GitHub license](https://img.shields.io/github/license/samclarke/robots-parser.svg)](https://github.com/samclarke/robots-parser/blob/master/license.md) [![Coverage Status](https://coveralls.io/repos/github/samclarke/robots-parser/badge.svg?branch=master)](https://coveralls.io/github/samclarke/robots-parser?branch=master)

@@ -98,5 +98,28 @@ NodeJS robots.txt parser.

### Version 2.3.0:
* Fixed bug where if the user-agent passed to `isAllowed()` / `isDisallowed()` is called "constructor" it would throw an error.
* Added support for relative URLs. This does not affect the default behavior so can safely be upgraded.
Relative matching is only allowed if both the robots.txt URL and the URLs being checked are relative.
For example:
```js
var robots = robotsParser('/robots.txt', [
'User-agent: *',
'Disallow: /dir/',
'Disallow: /test.html',
'Allow: /dir/test.html',
'Allow: /test.html'
].join('\n'));
robots.isAllowed('/test.html', 'Sams-Bot/1.0'); // false
robots.isAllowed('/dir/test.html', 'Sams-Bot/1.0'); // true
robots.isDisallowed('/dir/test2.html', 'Sams-Bot/1.0'); // true
```
### Version 2.2.0:
* Fix bug that with matching wildcard patterns with some URLs
* Fixed bug that with matching wildcard patterns with some URLs
&ndash; Thanks to @ckylape for reporting and fixing

@@ -103,0 +126,0 @@ * Changed matching algorithm to match Google's implementation in google/robotstxt

@@ -89,3 +89,3 @@ var URL = require('url').URL;

return urlEncodeToUpper(encodeURI(path).replace(/%25/g, '%'));
} catch(e) {
} catch (e) {
return path;

@@ -159,3 +159,6 @@ }

for (var i = 0; i < numMatchingLengths; i++) {
if (matchingLengths[i] < path.length && path[matchingLengths[i]] === pattern[p]) {
if (
matchingLengths[i] < path.length &&
path[matchingLengths[i]] === pattern[p]
) {
matchingLengths[numMatches++] = matchingLengths[i] + 1;

@@ -187,3 +190,3 @@ }

var isNoneUserAgentState = true;
for (var i=0; i < lines.length; i++) {
for (var i = 0; i < lines.length; i++) {
var line = lines[i];

@@ -239,6 +242,6 @@

function findRule(path, rules) {
var matchingRule = null;
var matchedRule = null;
for (var i=0; i < rules.length; i++) {
var rule = rules[i];
for (var i = 0; i < rules.length; i++) {
var rule = rules[i];

@@ -250,11 +253,15 @@ if (!matches(rule.pattern, path)) {

// The longest matching rule takes precedence
if (!matchingRule || rule.pattern.length > matchingRule.pattern.length) {
matchingRule = rule;
} else if (rule.pattern.length == matchingRule.pattern.length &&
rule.allow && !matchingRule.allow) {
matchingRule = rule;
// If rules are the same length then allow takes precedence
if (!matchedRule || rule.pattern.length > matchedRule.pattern.length) {
matchedRule = rule;
} else if (
rule.pattern.length == matchedRule.pattern.length &&
rule.allow &&
!matchedRule.allow
) {
matchedRule = rule;
}
}
}
return matchingRule;
return matchedRule;
}

@@ -273,4 +280,8 @@

try {
return new URL(url);
} catch(e) {
// Specify a URL to be used with relative paths
// Using non-existent subdomain so can never cause conflict unless
// trying to crawl it but doesn't exist and even if tried worst that can
// happen is it allows relative URLs on it.
return new URL(url, 'http://robots-relative.samclarke.com/');
} catch (e) {
return null;

@@ -280,3 +291,2 @@ }

function Robots(url, contents) {

@@ -286,3 +296,3 @@ this._url = parseUrl(url) || {};

this._rules = {};
this._rules = Object.create(null);
this._sitemaps = [];

@@ -364,8 +374,10 @@ this._preferredHost = null;

parsedUrl.port = parsedUrl.port || '80';
parsedUrl.port = parsedUrl.port || 80;
// The base URL must match otherwise this robots.txt is not valid for it.
if (parsedUrl.protocol !== this._url.protocol ||
if (
parsedUrl.protocol !== this._url.protocol ||
parsedUrl.hostname !== this._url.hostname ||
parsedUrl.port !== this._url.port) {
parsedUrl.port !== this._url.port
) {
return;

@@ -375,3 +387,3 @@ }

var rules = this._rules[userAgent] || this._rules['*'] || [];
var path = urlEncodeToUpper(parsedUrl.pathname + parsedUrl.search)
var path = urlEncodeToUpper(parsedUrl.pathname + parsedUrl.search);
var rule = findRule(path, rules);

@@ -378,0 +390,0 @@

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc