robots-txt-guard
Advanced tools
Comparing version 0.2.1 to 1.0.0
'use strict'; | ||
var patterns = require('./patterns'); | ||
const patterns = require('./patterns'); | ||
/** | ||
* @typedef {import('./patterns').Pattern} Pattern | ||
* @typedef {{ | ||
* pattern: Pattern | ||
* allow: boolean | ||
* }} Rule | ||
* @typedef {{ | ||
* pattern: Pattern | ||
* accessibilityRules: Rule[] | ||
* indexabilityRules: Rule[] | ||
* }} RuleGroup | ||
* @typedef {{ | ||
* rule: string | ||
* path: string | ||
* }} ConfigGroupRule | ||
* @typedef {{ | ||
* agents: string[] | ||
* rules: ConfigGroupRule[] | ||
* }} ConfigGroup | ||
* @typedef {{ | ||
* groups: ConfigGroup[] | ||
* }} Config | ||
* @typedef {{ | ||
* isAllowed: (userAgent: string, path: string) => boolean | ||
* isDisallowAll: (userAgent: string) => boolean | ||
* isIndexable: (userAgent: string, path: string) => boolean | ||
* }} Guard | ||
*/ | ||
function moreSpecificFirst(obj1, obj2) { | ||
/** | ||
* @param {{ pattern: { specificity: number} }} obj1 | ||
* @param {{ pattern: { specificity: number} }} obj2 | ||
* @returns {number} | ||
*/ | ||
function moreSpecificFirst (obj1, obj2) { | ||
return obj2.pattern.specificity - obj1.pattern.specificity; | ||
} | ||
module.exports = function makeGuard(config) { | ||
var groups = []; | ||
/** | ||
* @param {Config} config | ||
* @returns {Guard} | ||
*/ | ||
module.exports = function makeGuard (config) { | ||
/** | ||
* @type {RuleGroup[]} | ||
*/ | ||
const groups = []; | ||
@@ -31,3 +71,3 @@ // flatten agents | ||
return group; | ||
}, []) | ||
}, /** @type {{ rule: string, path: string }[]} */([])) | ||
.map(({ rule, path }) => ({ | ||
@@ -59,3 +99,7 @@ pattern: patterns.path(path), | ||
function findGroup(userAgent) { | ||
/** | ||
* @param {string} userAgent | ||
* @returns {RuleGroup | null} | ||
*/ | ||
function findGroup (userAgent) { | ||
for (const group of groups) { | ||
@@ -69,3 +113,8 @@ if (group.pattern.test(userAgent)) { | ||
function matchRule(rules, path) { | ||
/** | ||
* @param {Rule[]} rules | ||
* @param {string} path | ||
* @returns {boolean} | ||
*/ | ||
function matchRule (rules, path) { | ||
for (const rule of rules) { | ||
@@ -80,4 +129,10 @@ if (rule.pattern.test(path)) { | ||
function isRuleSetAllowed(ruleSet, userAgent, path) { | ||
var group = findGroup(userAgent); | ||
/** | ||
* @param {'accessibilityRules' | 'indexabilityRules'} ruleSet | ||
* @param {string} userAgent | ||
* @param {string} path | ||
* @returns {boolean} | ||
*/ | ||
function isRuleSetAllowed (ruleSet, userAgent, path) { | ||
const group = findGroup(userAgent); | ||
if (group) { | ||
@@ -90,14 +145,28 @@ return matchRule(group[ruleSet], path); | ||
function isAllowed(userAgent, path) { | ||
/** | ||
* @param {string} userAgent | ||
* @param {string} path | ||
* @returns {boolean} | ||
*/ | ||
function isAllowed (userAgent, path) { | ||
return isRuleSetAllowed('accessibilityRules', userAgent, path); | ||
} | ||
function isIndexable(userAgent, path) { | ||
/** | ||
* @param {string} userAgent | ||
* @param {string} path | ||
* @returns {boolean} | ||
*/ | ||
function isIndexable (userAgent, path) { | ||
return isRuleSetAllowed('indexabilityRules', userAgent, path); | ||
} | ||
function isDisallowAll(userAgent) { | ||
var group = findGroup(userAgent); | ||
/** | ||
* @param {string} userAgent | ||
* @returns {boolean} | ||
*/ | ||
function isDisallowAll (userAgent) { | ||
const group = findGroup(userAgent); | ||
if (group) { | ||
var allowRules = group.accessibilityRules.filter(function ({ pattern, allow }) { | ||
const allowRules = group.accessibilityRules.filter(function ({ pattern, allow }) { | ||
return allow || pattern.specificity > 1; | ||
@@ -104,0 +173,0 @@ }); |
'use strict'; | ||
/** | ||
* @typedef {{ | ||
* specificity: number | ||
* test: (input: string) => boolean | ||
* }} Pattern | ||
*/ | ||
// translates a robots.txt glob patterns to regexes | ||
function escapeRegExp(regexString) { | ||
/** | ||
* @param {string} regexString | ||
* @returns {string} | ||
*/ | ||
function escapeRegExp (regexString) { | ||
return regexString | ||
.replace(/[\*\/\-\[\]\{\}\(\)\+\?\.\,\\\^\$\|\#]/g, '\\$&'); | ||
.replace(/[*/\-[\]{}()+?.,\\^$|#]/g, '\\$&'); | ||
} | ||
exports.path = function makePathPattern(pattern) { | ||
var firstChar = pattern[0], | ||
lastChar = pattern[pattern.length - 1], | ||
matchEnd = lastChar === '$'; | ||
/** | ||
* @param {string} pattern | ||
* @returns {Pattern} | ||
*/ | ||
exports.path = function makePathPattern (pattern) { | ||
const firstChar = pattern[0]; | ||
const lastChar = pattern[pattern.length - 1]; | ||
const matchEnd = lastChar === '$'; | ||
@@ -23,6 +38,6 @@ if (firstChar !== '/') { | ||
// wildcards are ignored in specificity | ||
var specificityString = pattern.replace(/\*/g, ''); | ||
const specificityString = pattern.replace(/\*/g, ''); | ||
pattern = pattern | ||
.split('*') | ||
.split(/\*+/) | ||
.map(escapeRegExp) | ||
@@ -36,5 +51,9 @@ .join('(?:.*)'); | ||
var regexp = new RegExp(pattern); | ||
const regexp = new RegExp(pattern); | ||
function test(path) { | ||
/** | ||
* @param {string} path | ||
* @returns {boolean} | ||
*/ | ||
function test (path) { | ||
return regexp.test(path); | ||
@@ -49,7 +68,14 @@ } | ||
function alwaysTrue() { | ||
/** | ||
* @returns {true} | ||
*/ | ||
function alwaysTrue () { | ||
return true; | ||
} | ||
exports.userAgent = function makeUserAgentPattern(pattern) { | ||
/** | ||
* @param {string} pattern | ||
* @returns {Pattern} | ||
*/ | ||
exports.userAgent = function makeUserAgentPattern (pattern) { | ||
if (pattern === '*') { | ||
@@ -62,9 +88,13 @@ return { | ||
var specificityString = pattern; | ||
const specificityString = pattern; | ||
pattern = escapeRegExp(pattern); | ||
var regexp = new RegExp(pattern, 'i'); | ||
const regexp = new RegExp(pattern, 'i'); | ||
function test(path) { | ||
/** | ||
* @param {string} path | ||
* @returns {boolean} | ||
*/ | ||
function test (path) { | ||
return regexp.test(path); | ||
@@ -71,0 +101,0 @@ } |
{ | ||
"name": "robots-txt-guard", | ||
"version": "0.2.1", | ||
"version": "1.0.0", | ||
"description": "Validate urls against robots.txt rules.", | ||
@@ -11,5 +11,9 @@ "main": "lib/guard.js", | ||
"scripts": { | ||
"test:ci": "npm run lint && npm run typescript && npm run test", | ||
"test": "mocha -R spec ./test", | ||
"test-watch": "mocha -w -R spec ./test", | ||
"version": "echo $npm_package_version" | ||
"version": "echo $npm_package_version", | ||
"typescript": "tsc", | ||
"lint": "semistandard", | ||
"fix": "semistandard --fix" | ||
}, | ||
@@ -19,6 +23,8 @@ "author": "Jan Potoms", | ||
"devDependencies": { | ||
"chai": "^1.9.1", | ||
"mocha": "^2.0.1" | ||
"chai": "^2.3.0", | ||
"mocha": "^8.2.1", | ||
"semistandard": "^16.0.0", | ||
"typescript": "^4.1.3" | ||
}, | ||
"dependencies": {} | ||
} |
@@ -1,18 +0,14 @@ | ||
/*global describe, it*/ | ||
/* global describe, it */ | ||
'use strict'; | ||
var guard = require('../lib/guard'), | ||
assert = require('chai').assert; | ||
const guard = require('../lib/guard'); | ||
const assert = require('chai').assert; | ||
describe('guard', function () { | ||
it('should pick most specific rule', function () { | ||
// both groups should behave the same, regardless of the order of the rules | ||
var robotsTxt = guard({ | ||
const robotsTxt = guard({ | ||
groups: [{ | ||
agents: [ 'agent1' ], | ||
agents: ['agent1'], | ||
rules: [ | ||
@@ -23,3 +19,3 @@ { rule: 'allow', path: '/' }, | ||
}, { | ||
agents: [ 'agent2' ], | ||
agents: ['agent2'], | ||
rules: [ | ||
@@ -43,7 +39,6 @@ { rule: 'disallow', path: '/fish' }, | ||
it('allow should get priority', function () { | ||
// all groups should behave the same, regardless of the order of the rules | ||
var robotsTxt = guard({ | ||
const robotsTxt = guard({ | ||
groups: [{ | ||
agents: [ 'agent1' ], | ||
agents: ['agent1'], | ||
rules: [ | ||
@@ -54,3 +49,3 @@ { rule: 'allow', path: '/fish' }, | ||
}, { | ||
agents: [ 'agent2' ], | ||
agents: ['agent2'], | ||
rules: [ | ||
@@ -61,3 +56,3 @@ { rule: 'disallow', path: '/fish' }, | ||
}, { | ||
agents: [ 'agent3' ], | ||
agents: ['agent3'], | ||
rules: [ | ||
@@ -83,7 +78,6 @@ { rule: 'disallow', path: '/fish' }, | ||
it('should have the correct behaviour when no / is added at the end of the path', function () { | ||
// both groups should behave the same, regardless of the order of the rules | ||
var robotsTxt = guard({ | ||
const robotsTxt = guard({ | ||
groups: [{ | ||
agents: [ 'agent1' ], | ||
agents: ['agent1'], | ||
rules: [ | ||
@@ -94,3 +88,3 @@ { rule: 'allow', path: '/fish' }, | ||
}, { | ||
agents: [ 'agent2' ], | ||
agents: ['agent2'], | ||
rules: [ | ||
@@ -119,7 +113,6 @@ { rule: 'disallow', path: '/fish' } | ||
it('should have the correct behaviour when / is added at the end of the path', function () { | ||
// both groups should behave the same, regardless of the order of the rules | ||
var robotsTxt = guard({ | ||
const robotsTxt = guard({ | ||
groups: [{ | ||
agents: [ 'agent1' ], | ||
agents: ['agent1'], | ||
rules: [ | ||
@@ -130,3 +123,3 @@ { rule: 'allow', path: '/fish/' }, | ||
}, { | ||
agents: [ 'agent2' ], | ||
agents: ['agent2'], | ||
rules: [ | ||
@@ -156,7 +149,6 @@ { rule: 'disallow', path: '/fish/' } | ||
it('noindex shouldn\'t interfere with allow', function () { | ||
// both groups should behave the same, regardless of the order of the rules | ||
var robotsTxt = guard({ | ||
const robotsTxt = guard({ | ||
groups: [{ | ||
agents: [ 'agent1' ], | ||
agents: ['agent1'], | ||
rules: [ | ||
@@ -167,3 +159,3 @@ { rule: 'noindex', path: '/fish' }, | ||
}, { | ||
agents: [ 'agent2' ], | ||
agents: ['agent2'], | ||
rules: [ | ||
@@ -174,3 +166,3 @@ { rule: 'disallow', path: '/fish' }, | ||
}, { | ||
agents: [ 'agent3' ], | ||
agents: ['agent3'], | ||
rules: [ | ||
@@ -189,7 +181,6 @@ { rule: 'disallow', path: '/' }, | ||
it('should pick most specific agent', function () { | ||
// both groups should behave the same, regardless of the order of the rules | ||
var robotsTxt = guard({ | ||
const robotsTxt = guard({ | ||
groups: [{ | ||
agents: [ 'agent', 'agent2' ], | ||
agents: ['agent', 'agent2'], | ||
rules: [ | ||
@@ -199,3 +190,3 @@ { rule: 'disallow', path: '/disallow1' } | ||
}, { | ||
agents: [ '*' ], | ||
agents: ['*'], | ||
rules: [ | ||
@@ -205,3 +196,3 @@ { rule: 'disallow', path: '/disallow2' } | ||
}, { | ||
agents: [ 'agent1' ], | ||
agents: ['agent1'], | ||
rules: [ | ||
@@ -231,7 +222,6 @@ { rule: 'disallow', path: '/disallow3' } | ||
it('should pick most specific agent', function () { | ||
// both groups should behave the same, regardless of the order of the rules | ||
var robotsTxt = guard({ | ||
const robotsTxt = guard({ | ||
groups: [{ | ||
agents: [ '*' ], | ||
agents: ['*'], | ||
rules: [ | ||
@@ -248,7 +238,6 @@ { rule: 'disallow', path: '' } | ||
it('should detect disallow all', function () { | ||
// both groups should behave the same, regardless of the order of the rules | ||
var robotsTxt = guard({ | ||
const robotsTxt = guard({ | ||
groups: [{ | ||
agents: [ '*' ], | ||
agents: ['*'], | ||
rules: [ | ||
@@ -258,3 +247,3 @@ { rule: 'disallow', path: '/' } | ||
}, { | ||
agents: [ 'googlebot' ], | ||
agents: ['googlebot'], | ||
rules: [ | ||
@@ -272,5 +261,5 @@ { rule: 'disallow', path: '/' }, | ||
it('should detect disallow all', function () { | ||
var robotsTxt = guard({ | ||
const robotsTxt = guard({ | ||
groups: [{ | ||
agents: [ '*' ], | ||
agents: ['*'], | ||
rules: [ | ||
@@ -287,5 +276,5 @@ { rule: 'disallow', path: '/' }, | ||
it('should detect that not all paths are disallowed when only disallowing specific paths', function () { | ||
var robotsTxt = guard({ | ||
const robotsTxt = guard({ | ||
groups: [{ | ||
agents: [ '*' ], | ||
agents: ['*'], | ||
rules: [ | ||
@@ -301,8 +290,7 @@ { rule: 'disallow', path: '/fish' } | ||
it('should correctly detect if path is allowed with noindex', function () { | ||
var robotsTxt = guard( | ||
const robotsTxt = guard( | ||
{ | ||
groups: [ | ||
{ | ||
agents: [ '*' ], | ||
agents: ['*'], | ||
rules: [ | ||
@@ -326,8 +314,7 @@ { rule: 'allow', path: '/path1' }, | ||
it('should detect if path is indexable', function () { | ||
var robotsTxt = guard( | ||
const robotsTxt = guard( | ||
{ | ||
groups: [ | ||
{ | ||
agents: [ '*' ], | ||
agents: ['*'], | ||
rules: [ | ||
@@ -344,3 +331,3 @@ { rule: 'allow', path: '/path1' }, | ||
{ | ||
agents: [ 'googlebot' ], | ||
agents: ['googlebot'], | ||
rules: [ | ||
@@ -367,3 +354,2 @@ { rule: 'disallow', path: '/path1' }, | ||
}); | ||
}); |
@@ -1,7 +0,7 @@ | ||
/*global describe, it*/ | ||
/* global describe, it */ | ||
'use strict'; | ||
var patterns = require('../lib/patterns'), | ||
assert = require('chai').assert; | ||
const patterns = require('../lib/patterns'); | ||
const assert = require('chai').assert; | ||
@@ -12,8 +12,7 @@ // cases from: | ||
describe('patterns', function () { | ||
function assertMatch(pattern, string) { | ||
function assertMatch (pattern, string) { | ||
assert.ok(pattern.test(string), string); | ||
} | ||
function assertNoMatch(pattern, string) { | ||
function assertNoMatch (pattern, string) { | ||
assert.notOk(pattern.test(string), string); | ||
@@ -23,5 +22,4 @@ } | ||
describe('userAgent', function () { | ||
it('should match simple pattern', function () { | ||
var pattern = patterns.userAgent('googlebot-news'); | ||
const pattern = patterns.userAgent('googlebot-news'); | ||
@@ -40,3 +38,3 @@ assert.strictEqual(pattern.specificity, 14); | ||
it('should match wildcard', function () { | ||
var pattern = patterns.userAgent('*'); | ||
const pattern = patterns.userAgent('*'); | ||
@@ -51,9 +49,7 @@ assert.strictEqual(pattern.specificity, 0); | ||
}); | ||
}); | ||
describe('path', function () { | ||
it('should match simple pattern', function () { | ||
var pattern = patterns.path('/fish'); | ||
const pattern = patterns.path('/fish'); | ||
@@ -73,5 +69,5 @@ assert.strictEqual(pattern.specificity, 5); | ||
}); | ||
it('should match ending wildcard', function () { | ||
var pattern = patterns.path('/fish*'); | ||
const pattern = patterns.path('/fish*'); | ||
@@ -92,5 +88,5 @@ assert.strictEqual(pattern.specificity, 5); | ||
}); | ||
it('should match trailing slash', function () { | ||
var pattern = patterns.path('/fish/'); | ||
const pattern = patterns.path('/fish/'); | ||
@@ -107,5 +103,5 @@ assert.strictEqual(pattern.specificity, 6); | ||
}); | ||
it('should handle missing start slash', function () { | ||
var pattern = patterns.path('fish/'); | ||
const pattern = patterns.path('fish/'); | ||
@@ -123,5 +119,5 @@ assert.strictEqual(pattern.specificity, 6); | ||
}); | ||
it('should handle wildcards', function () { | ||
var pattern = patterns.path('/*.php'); | ||
const pattern = patterns.path('/*.php'); | ||
@@ -140,5 +136,5 @@ assert.strictEqual(pattern.specificity, 5); | ||
}); | ||
it('should handle end directive', function () { | ||
var pattern = patterns.path('/*.php$'); | ||
const pattern = patterns.path('/*.php$'); | ||
@@ -155,5 +151,5 @@ assert.strictEqual(pattern.specificity, 5); | ||
}); | ||
it('should handle wildcards in the middle', function () { | ||
var pattern = patterns.path('/fish*.php'); | ||
const pattern = patterns.path('/fish*.php'); | ||
@@ -167,5 +163,8 @@ assert.strictEqual(pattern.specificity, 9); | ||
}); | ||
it('should handle a ReDos', function () { | ||
const pattern = patterns.path('/*****************************************************************************.js$'); | ||
assertMatch(pattern, '/hello.js'); | ||
}); | ||
}); | ||
}); |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No v1
QualityPackage is not semver >=1. This means it is not stable and does not support ^ ranges.
Found 1 instance in 1 package
25746
11
673
1
4