Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

robots-txt-guard

Package Overview
Dependencies
Maintainers
4
Versions
8
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

robots-txt-guard - npm Package Compare versions

Comparing version 0.2.1 to 1.0.0

tsconfig.json

97

lib/guard.js
'use strict';
var patterns = require('./patterns');
const patterns = require('./patterns');
/**
* @typedef {import('./patterns').Pattern} Pattern
* @typedef {{
* pattern: Pattern
* allow: boolean
* }} Rule
* @typedef {{
* pattern: Pattern
* accessibilityRules: Rule[]
* indexabilityRules: Rule[]
* }} RuleGroup
* @typedef {{
* rule: string
* path: string
* }} ConfigGroupRule
* @typedef {{
* agents: string[]
* rules: ConfigGroupRule[]
* }} ConfigGroup
* @typedef {{
* groups: ConfigGroup[]
* }} Config
* @typedef {{
* isAllowed: (userAgent: string, path: string) => boolean
* isDisallowAll: (userAgent: string) => boolean
* isIndexable: (userAgent: string, path: string) => boolean
* }} Guard
*/
function moreSpecificFirst(obj1, obj2) {
/**
* @param {{ pattern: { specificity: number} }} obj1
* @param {{ pattern: { specificity: number} }} obj2
* @returns {number}
*/
function moreSpecificFirst (obj1, obj2) {
return obj2.pattern.specificity - obj1.pattern.specificity;
}
module.exports = function makeGuard(config) {
var groups = [];
/**
* @param {Config} config
* @returns {Guard}
*/
module.exports = function makeGuard (config) {
/**
* @type {RuleGroup[]}
*/
const groups = [];

@@ -31,3 +71,3 @@ // flatten agents

return group;
}, [])
}, /** @type {{ rule: string, path: string }[]} */([]))
.map(({ rule, path }) => ({

@@ -59,3 +99,7 @@ pattern: patterns.path(path),

function findGroup(userAgent) {
/**
* @param {string} userAgent
* @returns {RuleGroup | null}
*/
function findGroup (userAgent) {
for (const group of groups) {

@@ -69,3 +113,8 @@ if (group.pattern.test(userAgent)) {

function matchRule(rules, path) {
/**
* @param {Rule[]} rules
* @param {string} path
* @returns {boolean}
*/
function matchRule (rules, path) {
for (const rule of rules) {

@@ -80,4 +129,10 @@ if (rule.pattern.test(path)) {

function isRuleSetAllowed(ruleSet, userAgent, path) {
var group = findGroup(userAgent);
/**
* @param {'accessibilityRules' | 'indexabilityRules'} ruleSet
* @param {string} userAgent
* @param {string} path
* @returns {boolean}
*/
function isRuleSetAllowed (ruleSet, userAgent, path) {
const group = findGroup(userAgent);
if (group) {

@@ -90,14 +145,28 @@ return matchRule(group[ruleSet], path);

function isAllowed(userAgent, path) {
/**
* @param {string} userAgent
* @param {string} path
* @returns {boolean}
*/
function isAllowed (userAgent, path) {
return isRuleSetAllowed('accessibilityRules', userAgent, path);
}
function isIndexable(userAgent, path) {
/**
* @param {string} userAgent
* @param {string} path
* @returns {boolean}
*/
function isIndexable (userAgent, path) {
return isRuleSetAllowed('indexabilityRules', userAgent, path);
}
function isDisallowAll(userAgent) {
var group = findGroup(userAgent);
/**
* @param {string} userAgent
* @returns {boolean}
*/
function isDisallowAll (userAgent) {
const group = findGroup(userAgent);
if (group) {
var allowRules = group.accessibilityRules.filter(function ({ pattern, allow }) {
const allowRules = group.accessibilityRules.filter(function ({ pattern, allow }) {
return allow || pattern.specificity > 1;

@@ -104,0 +173,0 @@ });

60

lib/patterns.js
'use strict';
/**
* @typedef {{
* specificity: number
* test: (input: string) => boolean
* }} Pattern
*/
// translates a robots.txt glob patterns to regexes
function escapeRegExp(regexString) {
/**
* @param {string} regexString
* @returns {string}
*/
function escapeRegExp (regexString) {
return regexString
.replace(/[\*\/\-\[\]\{\}\(\)\+\?\.\,\\\^\$\|\#]/g, '\\$&');
.replace(/[*/\-[\]{}()+?.,\\^$|#]/g, '\\$&');
}
exports.path = function makePathPattern(pattern) {
var firstChar = pattern[0],
lastChar = pattern[pattern.length - 1],
matchEnd = lastChar === '$';
/**
* @param {string} pattern
* @returns {Pattern}
*/
exports.path = function makePathPattern (pattern) {
const firstChar = pattern[0];
const lastChar = pattern[pattern.length - 1];
const matchEnd = lastChar === '$';

@@ -23,6 +38,6 @@ if (firstChar !== '/') {

// wildcards are ignored in specificity
var specificityString = pattern.replace(/\*/g, '');
const specificityString = pattern.replace(/\*/g, '');
pattern = pattern
.split('*')
.split(/\*+/)
.map(escapeRegExp)

@@ -36,5 +51,9 @@ .join('(?:.*)');

var regexp = new RegExp(pattern);
const regexp = new RegExp(pattern);
function test(path) {
/**
* @param {string} path
* @returns {boolean}
*/
function test (path) {
return regexp.test(path);

@@ -49,7 +68,14 @@ }

function alwaysTrue() {
/**
* @returns {true}
*/
function alwaysTrue () {
return true;
}
exports.userAgent = function makeUserAgentPattern(pattern) {
/**
* @param {string} pattern
* @returns {Pattern}
*/
exports.userAgent = function makeUserAgentPattern (pattern) {
if (pattern === '*') {

@@ -62,9 +88,13 @@ return {

var specificityString = pattern;
const specificityString = pattern;
pattern = escapeRegExp(pattern);
var regexp = new RegExp(pattern, 'i');
const regexp = new RegExp(pattern, 'i');
function test(path) {
/**
* @param {string} path
* @returns {boolean}
*/
function test (path) {
return regexp.test(path);

@@ -71,0 +101,0 @@ }

{
"name": "robots-txt-guard",
"version": "0.2.1",
"version": "1.0.0",
"description": "Validate urls against robots.txt rules.",

@@ -11,5 +11,9 @@ "main": "lib/guard.js",

"scripts": {
"test:ci": "npm run lint && npm run typescript && npm run test",
"test": "mocha -R spec ./test",
"test-watch": "mocha -w -R spec ./test",
"version": "echo $npm_package_version"
"version": "echo $npm_package_version",
"typescript": "tsc",
"lint": "semistandard",
"fix": "semistandard --fix"
},

@@ -19,6 +23,8 @@ "author": "Jan Potoms",

"devDependencies": {
"chai": "^1.9.1",
"mocha": "^2.0.1"
"chai": "^2.3.0",
"mocha": "^8.2.1",
"semistandard": "^16.0.0",
"typescript": "^4.1.3"
},
"dependencies": {}
}

@@ -1,18 +0,14 @@

/*global describe, it*/
/* global describe, it */
'use strict';
var guard = require('../lib/guard'),
assert = require('chai').assert;
const guard = require('../lib/guard');
const assert = require('chai').assert;
describe('guard', function () {
it('should pick most specific rule', function () {
// both groups should behave the same, regardless of the order of the rules
var robotsTxt = guard({
const robotsTxt = guard({
groups: [{
agents: [ 'agent1' ],
agents: ['agent1'],
rules: [

@@ -23,3 +19,3 @@ { rule: 'allow', path: '/' },

}, {
agents: [ 'agent2' ],
agents: ['agent2'],
rules: [

@@ -43,7 +39,6 @@ { rule: 'disallow', path: '/fish' },

it('allow should get priority', function () {
// all groups should behave the same, regardless of the order of the rules
var robotsTxt = guard({
const robotsTxt = guard({
groups: [{
agents: [ 'agent1' ],
agents: ['agent1'],
rules: [

@@ -54,3 +49,3 @@ { rule: 'allow', path: '/fish' },

}, {
agents: [ 'agent2' ],
agents: ['agent2'],
rules: [

@@ -61,3 +56,3 @@ { rule: 'disallow', path: '/fish' },

}, {
agents: [ 'agent3' ],
agents: ['agent3'],
rules: [

@@ -83,7 +78,6 @@ { rule: 'disallow', path: '/fish' },

it('should have the correct behaviour when no / is added at the end of the path', function () {
// both groups should behave the same, regardless of the order of the rules
var robotsTxt = guard({
const robotsTxt = guard({
groups: [{
agents: [ 'agent1' ],
agents: ['agent1'],
rules: [

@@ -94,3 +88,3 @@ { rule: 'allow', path: '/fish' },

}, {
agents: [ 'agent2' ],
agents: ['agent2'],
rules: [

@@ -119,7 +113,6 @@ { rule: 'disallow', path: '/fish' }

it('should have the correct behaviour when / is added at the end of the path', function () {
// both groups should behave the same, regardless of the order of the rules
var robotsTxt = guard({
const robotsTxt = guard({
groups: [{
agents: [ 'agent1' ],
agents: ['agent1'],
rules: [

@@ -130,3 +123,3 @@ { rule: 'allow', path: '/fish/' },

}, {
agents: [ 'agent2' ],
agents: ['agent2'],
rules: [

@@ -156,7 +149,6 @@ { rule: 'disallow', path: '/fish/' }

it('noindex shouldn\'t interfere with allow', function () {
// both groups should behave the same, regardless of the order of the rules
var robotsTxt = guard({
const robotsTxt = guard({
groups: [{
agents: [ 'agent1' ],
agents: ['agent1'],
rules: [

@@ -167,3 +159,3 @@ { rule: 'noindex', path: '/fish' },

}, {
agents: [ 'agent2' ],
agents: ['agent2'],
rules: [

@@ -174,3 +166,3 @@ { rule: 'disallow', path: '/fish' },

}, {
agents: [ 'agent3' ],
agents: ['agent3'],
rules: [

@@ -189,7 +181,6 @@ { rule: 'disallow', path: '/' },

it('should pick most specific agent', function () {
// both groups should behave the same, regardless of the order of the rules
var robotsTxt = guard({
const robotsTxt = guard({
groups: [{
agents: [ 'agent', 'agent2' ],
agents: ['agent', 'agent2'],
rules: [

@@ -199,3 +190,3 @@ { rule: 'disallow', path: '/disallow1' }

}, {
agents: [ '*' ],
agents: ['*'],
rules: [

@@ -205,3 +196,3 @@ { rule: 'disallow', path: '/disallow2' }

}, {
agents: [ 'agent1' ],
agents: ['agent1'],
rules: [

@@ -231,7 +222,6 @@ { rule: 'disallow', path: '/disallow3' }

it('should pick most specific agent', function () {
// both groups should behave the same, regardless of the order of the rules
var robotsTxt = guard({
const robotsTxt = guard({
groups: [{
agents: [ '*' ],
agents: ['*'],
rules: [

@@ -248,7 +238,6 @@ { rule: 'disallow', path: '' }

it('should detect disallow all', function () {
// both groups should behave the same, regardless of the order of the rules
var robotsTxt = guard({
const robotsTxt = guard({
groups: [{
agents: [ '*' ],
agents: ['*'],
rules: [

@@ -258,3 +247,3 @@ { rule: 'disallow', path: '/' }

}, {
agents: [ 'googlebot' ],
agents: ['googlebot'],
rules: [

@@ -272,5 +261,5 @@ { rule: 'disallow', path: '/' },

it('should detect disallow all', function () {
var robotsTxt = guard({
const robotsTxt = guard({
groups: [{
agents: [ '*' ],
agents: ['*'],
rules: [

@@ -287,5 +276,5 @@ { rule: 'disallow', path: '/' },

it('should detect that not all paths are disallowed when only disallowing specific paths', function () {
var robotsTxt = guard({
const robotsTxt = guard({
groups: [{
agents: [ '*' ],
agents: ['*'],
rules: [

@@ -301,8 +290,7 @@ { rule: 'disallow', path: '/fish' }

it('should correctly detect if path is allowed with noindex', function () {
var robotsTxt = guard(
const robotsTxt = guard(
{
groups: [
{
agents: [ '*' ],
agents: ['*'],
rules: [

@@ -326,8 +314,7 @@ { rule: 'allow', path: '/path1' },

it('should detect if path is indexable', function () {
var robotsTxt = guard(
const robotsTxt = guard(
{
groups: [
{
agents: [ '*' ],
agents: ['*'],
rules: [

@@ -344,3 +331,3 @@ { rule: 'allow', path: '/path1' },

{
agents: [ 'googlebot' ],
agents: ['googlebot'],
rules: [

@@ -367,3 +354,2 @@ { rule: 'disallow', path: '/path1' },

});
});

@@ -1,7 +0,7 @@

/*global describe, it*/
/* global describe, it */
'use strict';
var patterns = require('../lib/patterns'),
assert = require('chai').assert;
const patterns = require('../lib/patterns');
const assert = require('chai').assert;

@@ -12,8 +12,7 @@ // cases from:

describe('patterns', function () {
function assertMatch(pattern, string) {
function assertMatch (pattern, string) {
assert.ok(pattern.test(string), string);
}
function assertNoMatch(pattern, string) {
function assertNoMatch (pattern, string) {
assert.notOk(pattern.test(string), string);

@@ -23,5 +22,4 @@ }

describe('userAgent', function () {
it('should match simple pattern', function () {
var pattern = patterns.userAgent('googlebot-news');
const pattern = patterns.userAgent('googlebot-news');

@@ -40,3 +38,3 @@ assert.strictEqual(pattern.specificity, 14);

it('should match wildcard', function () {
var pattern = patterns.userAgent('*');
const pattern = patterns.userAgent('*');

@@ -51,9 +49,7 @@ assert.strictEqual(pattern.specificity, 0);

});
});
describe('path', function () {
it('should match simple pattern', function () {
var pattern = patterns.path('/fish');
const pattern = patterns.path('/fish');

@@ -73,5 +69,5 @@ assert.strictEqual(pattern.specificity, 5);

});
it('should match ending wildcard', function () {
var pattern = patterns.path('/fish*');
const pattern = patterns.path('/fish*');

@@ -92,5 +88,5 @@ assert.strictEqual(pattern.specificity, 5);

});
it('should match trailing slash', function () {
var pattern = patterns.path('/fish/');
const pattern = patterns.path('/fish/');

@@ -107,5 +103,5 @@ assert.strictEqual(pattern.specificity, 6);

});
it('should handle missing start slash', function () {
var pattern = patterns.path('fish/');
const pattern = patterns.path('fish/');

@@ -123,5 +119,5 @@ assert.strictEqual(pattern.specificity, 6);

});
it('should handle wildcards', function () {
var pattern = patterns.path('/*.php');
const pattern = patterns.path('/*.php');

@@ -140,5 +136,5 @@ assert.strictEqual(pattern.specificity, 5);

});
it('should handle end directive', function () {
var pattern = patterns.path('/*.php$');
const pattern = patterns.path('/*.php$');

@@ -155,5 +151,5 @@ assert.strictEqual(pattern.specificity, 5);

});
it('should handle wildcards in the middle', function () {
var pattern = patterns.path('/fish*.php');
const pattern = patterns.path('/fish*.php');

@@ -167,5 +163,8 @@ assert.strictEqual(pattern.specificity, 9);

});
it('should handle a ReDos', function () {
const pattern = patterns.path('/*****************************************************************************.js$');
assertMatch(pattern, '/hello.js');
});
});
});

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc