robots-parser - npm Package Compare versions

Comparing version 2.2.0 to 2.3.0

package.json

		{
		"name": "robots-parser",
		"version": "2.2.0",
		"description": "Robots.txt parser.",
		"main": "index.js",
		"directories": {
		"test": "tests"
		},
		"scripts": {
		"test": "nyc --clean --reporter html --report-dir ./report/coverage ./node_modules/.bin/mocha"
		},
		"repository": {
		"type": "git",
		"url": "https://github.com/samclarke/robots-parser.git"
		},
		"author": "Sam Clarke <sam@samclarke.com>",
		"license": "MIT",
		"devDependencies": {
		"chai": "^4.2.0",
		"mocha": "^6.1.4",
		"nyc": "^14.1.1"
		}
		"name": "robots-parser",
		"version": "2.3.0",
		"description": "Robots.txt parser.",
		"main": "index.js",
		"directories": {
		"test": "tests"
		},
		"scripts": {
		"test": "nyc --reporter=text-summary --reporter=html --reporter=lcovonly mocha"
		},
		"repository": {
		"type": "git",
		"url": "https://github.com/samclarke/robots-parser.git"
		},
		"author": "Sam Clarke <sam@samclarke.com>",
		"license": "MIT",
		"files": [
		"/Robots.js",
		"/index.js"
		],
		"prettier": {
		"tabWidth": 4,
		"useTabs": true,
		"singleQuote": true,
		"trailingComma": "none"
		},
		"devDependencies": {
		"chai": "^4.2.0",
		"mocha": "^6.1.4",
		"nyc": "^14.1.1"
		}
		}

readme.md

		@@ -1,2 +0,2 @@
		# Robots Parser [![DeepScan Grade](https://deepscan.io/api/projects/1275/branches/3378/badge/grade.svg)](https://deepscan.io/dashboard/#view=project&pid=1275&bid=3378) [![GitHub license](https://img.shields.io/github/license/samclarke/robots-parser.svg)](https://github.com/samclarke/robots-parser/blob/master/license.md)
		# Robots Parser [![DeepScan grade](https://deepscan.io/api/teams/457/projects/16277/branches/344939/badge/grade.svg)](https://deepscan.io/dashboard#view=project&tid=457&pid=16277&bid=344939) [![GitHub license](https://img.shields.io/github/license/samclarke/robots-parser.svg)](https://github.com/samclarke/robots-parser/blob/master/license.md) [![Coverage Status](https://coveralls.io/repos/github/samclarke/robots-parser/badge.svg?branch=master)](https://coveralls.io/github/samclarke/robots-parser?branch=master)

		@@ -98,5 +98,28 @@ NodeJS robots.txt parser.

		### Version 2.3.0:

		* Fixed bug where if the user-agent passed to `isAllowed()` / `isDisallowed()` is called "constructor" it would throw an error.
		* Added support for relative URLs. This does not affect the default behavior so can safely be upgraded.

		Relative matching is only allowed if both the robots.txt URL and the URLs being checked are relative.

		For example:
		```js
		var robots = robotsParser('/robots.txt', [
		'User-agent: *',
		'Disallow: /dir/',
		'Disallow: /test.html',
		'Allow: /dir/test.html',
		'Allow: /test.html'
		].join('\n'));

		robots.isAllowed('/test.html', 'Sams-Bot/1.0'); // false
		robots.isAllowed('/dir/test.html', 'Sams-Bot/1.0'); // true
		robots.isDisallowed('/dir/test2.html', 'Sams-Bot/1.0'); // true
		```


		### Version 2.2.0:

		* Fix bug that with matching wildcard patterns with some URLs
		* Fixed bug that with matching wildcard patterns with some URLs
		– Thanks to @ckylape for reporting and fixing
		@@ -103,0 +126,0 @@ * Changed matching algorithm to match Google's implementation in google/robotstxt

Robots.js

		@@ -89,3 +89,3 @@ var URL = require('url').URL;
		return urlEncodeToUpper(encodeURI(path).replace(/%25/g, '%'));
		} catch(e) {
		} catch (e) {
		return path;
		@@ -159,3 +159,6 @@ }
		for (var i = 0; i < numMatchingLengths; i++) {
		if (matchingLengths[i] < path.length && path[matchingLengths[i]] === pattern[p]) {
		if (
		matchingLengths[i] < path.length &&
		path[matchingLengths[i]] === pattern[p]
		) {
		matchingLengths[numMatches++] = matchingLengths[i] + 1;
		@@ -187,3 +190,3 @@ }
		var isNoneUserAgentState = true;
		for (var i=0; i < lines.length; i++) {
		for (var i = 0; i < lines.length; i++) {
		var line = lines[i];
		@@ -239,6 +242,6 @@
		function findRule(path, rules) {
		var matchingRule = null;
		var matchedRule = null;

		for (var i=0; i < rules.length; i++) {
		var rule = rules[i];
		for (var i = 0; i < rules.length; i++) {
		var rule = rules[i];

		@@ -250,11 +253,15 @@ if (!matches(rule.pattern, path)) {
		// The longest matching rule takes precedence
		if (!matchingRule \|\| rule.pattern.length > matchingRule.pattern.length) {
		matchingRule = rule;
		} else if (rule.pattern.length == matchingRule.pattern.length &&
		rule.allow && !matchingRule.allow) {
		matchingRule = rule;
		// If rules are the same length then allow takes precedence
		if (!matchedRule \|\| rule.pattern.length > matchedRule.pattern.length) {
		matchedRule = rule;
		} else if (
		rule.pattern.length == matchedRule.pattern.length &&
		rule.allow &&
		!matchedRule.allow
		) {
		matchedRule = rule;
		}
		}
		}

		return matchingRule;
		return matchedRule;
		}
		@@ -273,4 +280,8 @@
		try {
		return new URL(url);
		} catch(e) {
		// Specify a URL to be used with relative paths
		// Using non-existent subdomain so can never cause conflict unless
		// trying to crawl it but doesn't exist and even if tried worst that can
		// happen is it allows relative URLs on it.
		return new URL(url, 'http://robots-relative.samclarke.com/');
		} catch (e) {
		return null;
		@@ -280,3 +291,2 @@ }


		function Robots(url, contents) {
		@@ -286,3 +296,3 @@ this._url = parseUrl(url) \|\| {};

		this._rules = {};
		this._rules = Object.create(null);
		this._sitemaps = [];
		@@ -364,8 +374,10 @@ this._preferredHost = null;

		parsedUrl.port = parsedUrl.port \|\| '80';
		parsedUrl.port = parsedUrl.port \|\| 80;

		// The base URL must match otherwise this robots.txt is not valid for it.
		if (parsedUrl.protocol !== this._url.protocol \|\|
		if (
		parsedUrl.protocol !== this._url.protocol \|\|
		parsedUrl.hostname !== this._url.hostname \|\|
		parsedUrl.port !== this._url.port) {
		parsedUrl.port !== this._url.port
		) {
		return;
		@@ -375,3 +387,3 @@ }
		var rules = this._rules[userAgent] \|\| this._rules['*'] \|\| [];
		var path = urlEncodeToUpper(parsedUrl.pathname + parsedUrl.search)
		var path = urlEncodeToUpper(parsedUrl.pathname + parsedUrl.search);
		var rule = findRule(path, rules);
		@@ -378,0 +390,0 @@

test/Robots.js

robots-parser - npm Package Compare versions

Improved metrics

Worsened metrics