Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

robots-txt-parse

Package Overview
Dependencies
Maintainers
4
Versions
8
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

robots-txt-parse - npm Package Compare versions

Comparing version 1.0.1 to 2.0.0

CHANGELOG.md

172

lib/parse.js
'use strict';
var split = require('split'),
through = require('through'),
combine = require('stream-combiner'),
Promise = require('bluebird');
const split = require('split');
const { pipeline, Transform } = require('stream');
const START_GROUP = 'START_GROUP';
const GROUP_MEMBER = 'GROUP_MEMBER';
const NON_GROUP = 'NON_GROUP';
/**
* @typedef {{
* type: START_GROUP
* agent: string
* } | {
* type: GROUP_MEMBER
* rule: string
* path: string
* } | {
* type: NON_GROUP
* field: string
* value: string
* }} Token
*/
/**
* @typedef {{
* rule: string
* path: string
* }} GroupRule
* @typedef {{
* agents: string[]
* rules: GroupRule[]
* }} Group
* @typedef {{
* extension: string
* value: string
*}} Extension
* @typedef {{
* groups: Group[]
* extensions: Extension[]
* }} ParseResult
*/
var START_GROUP = 'START_GROUP',
GROUP_MEMBER = 'GROUP_MEMBER',
NON_GROUP = 'NON_GROUP';
/**
* @param {string} line
* @returns {Token | null}
*/
function parseLine (line) {
const commentFree = line.replace(/#.*$/, '');
const index = commentFree.indexOf(':');
function parseLine(line) {
var commentFree = line.replace(/#.*$/, ''),
index = commentFree.indexOf(':');
if (index === -1) return null;
if(index === -1) return null;
const field = commentFree.substr(0, index).trim().toLowerCase();
const value = commentFree.substr(index + 1).trim();
var field = commentFree.substr(0,index).trim().toLowerCase(),
value = commentFree.substr(index+1).trim();
switch (field) {
case 'user-agent':
return {
type : START_GROUP,
type: START_GROUP,
agent: value

@@ -40,3 +73,3 @@ };

return {
type : NON_GROUP,
type: NON_GROUP,
field: field,

@@ -48,8 +81,11 @@ value: value

function tokenize() {
return through(function (line) {
var token = parseLine(line);
if (token) {
this.queue(token);
function tokenize () {
return new Transform({
readableObjectMode: true,
transform (line, encoding, callback) {
const token = parseLine(String(line));
if (token) {
this.push(token);
}
callback();
}

@@ -59,5 +95,9 @@ });

module.exports = function parse(content) {
var result = {
/**
* @param {import('stream').Readable} content
* @returns {Promise<ParseResult>}
*/
module.exports = function parse (content) {
/** @type {ParseResult} */
const result = {
groups: [],

@@ -67,50 +107,58 @@ extensions: []

var prevToken = null,
currentGroup = null;
/** @type {string | null} */
let prevToken = null;
/** @type {Group | null} */
let currentGroup = null;
var build = through(function (token) {
switch (token.type) {
case START_GROUP:
if (prevToken !== START_GROUP) {
currentGroup = {
agents : [],
rules : []
};
result.groups.push(currentGroup);
}
currentGroup.agents.push(token.agent);
break;
case GROUP_MEMBER:
if (currentGroup) {
currentGroup.rules.push({
rule: token.rule,
path: token.path
const build = new Transform({
objectMode: true,
transform (token, encoding, callback) {
switch (token.type) {
case START_GROUP:
if (prevToken !== START_GROUP) {
currentGroup = {
agents: [],
rules: []
};
result.groups.push(currentGroup);
}
if (currentGroup) {
currentGroup.agents.push(token.agent);
}
break;
case GROUP_MEMBER:
if (currentGroup) {
currentGroup.rules.push({
rule: token.rule,
path: token.path
});
}
break;
case NON_GROUP:
result.extensions.push({
extension: token.field,
value: token.value
});
}
break;
case NON_GROUP:
result.extensions.push({
extension: token.field,
value: token.value
});
break;
break;
}
prevToken = token.type;
callback();
}
prevToken = token.type;
});
return new Promise(function (resolve, reject) {
combine(
pipeline(
content,
split(),
tokenize(),
build
)
.on('error', reject)
.on('end', function () {
build,
(err) => {
if (err) {
return reject(err);
}
resolve(result);
});
}
);
});
};
{
"name": "robots-txt-parse",
"version": "1.0.1",
"version": "2.0.0",
"description": "Streaming parser for robots.txt files",

@@ -13,3 +13,7 @@ "main": "lib/parse.js",

"test-watch": "mocha -w -R spec ./test",
"version": "echo $npm_package_version"
"version": "echo $npm_package_version",
"test:ci": "npm run lint && npm run typescript && npm run test",
"typescript": "tsc",
"lint": "semistandard",
"fix": "semistandard --fix"
},

@@ -19,11 +23,13 @@ "author": "Jan Potoms",

"devDependencies": {
"chai": "^1.9.1",
"mocha": "^1.18.2"
"@types/node": "^14.14.31",
"@types/split": "^1.0.0",
"@types/through": "0.0.30",
"chai": "^4.3.0",
"mocha": "^8.3.0",
"semistandard": "^16.0.0",
"typescript": "^4.1.5"
},
"dependencies": {
"bluebird": "^2.3.5",
"split": "^0.3.0",
"stream-combiner": "^0.2.1",
"through": "^2.3.4"
"split": "^1.0.1"
}
}

@@ -1,128 +0,96 @@

/*global describe, it*/
/* global describe, it */
'use strict';
var parse = require('../lib/parse'),
fs = require('fs'),
path = require('path'),
assert = require('chai').assert;
const parse = require('../lib/parse');
const fs = require('fs');
const path = require('path');
const assert = require('chai').assert;
function getFixture(name) {
var fixturePath = path.resolve(__dirname, 'fixtures', name + '.txt'),
stream = fs.createReadStream(fixturePath);
function getFixture (name) {
const fixturePath = path.resolve(__dirname, 'fixtures', name + '.txt');
const stream = fs.createReadStream(fixturePath);
return stream;
}
describe('parser', function () {
it('should parse a simple group', async function () {
const parsed = await parse(getFixture('single-group'));
assert.isObject(parsed);
assert.property(parsed, 'groups');
assert.isArray(parsed.groups);
assert.lengthOf(parsed.groups, 1);
const group = parsed.groups[0];
assert.isObject(group);
it('should parse a simple group', function (done) {
parse(getFixture('single-group'))
.then(function (parsed) {
assert.isObject(parsed);
assert.property(parsed, 'groups');
assert.isArray(parsed.groups);
assert.lengthOf(parsed.groups, 1);
var group = parsed.groups[0];
assert.isObject(group);
assert.property(group, 'agents');
assert.isArray(group.agents);
assert.lengthOf(group.agents, 1);
assert.strictEqual(group.agents[0], '*');
assert.property(group, 'agents');
assert.isArray(group.agents);
assert.lengthOf(group.agents, 1);
assert.strictEqual(group.agents[0], '*');
assert.property(group, 'rules');
assert.isArray(group.rules);
assert.lengthOf(group.rules, 1);
const rule = group.rules[0];
assert.property(group, 'rules');
assert.isArray(group.rules);
assert.lengthOf(group.rules, 1);
var rule = group.rules[0];
assert.isObject(rule);
assert.propertyVal(rule, 'rule', 'disallow');
assert.propertyVal(rule, 'path', '/');
done();
})
.catch(done);
assert.isObject(rule);
assert.propertyVal(rule, 'rule', 'disallow');
assert.propertyVal(rule, 'path', '/');
});
it('should parse multiple agents', function (done) {
parse(getFixture('multiple-agents'))
.then(function (parsed) {
assert.deepPropertyVal(parsed, 'groups[0].agents[0]', '*');
assert.deepPropertyVal(parsed, 'groups[0].agents[1]', 'agent1');
assert.deepPropertyVal(parsed, 'groups[0].agents[2]', 'agent2');
done();
})
.catch(done);
it('should parse multiple agents', async function () {
const parsed = await parse(getFixture('multiple-agents'));
assert.nestedPropertyVal(parsed, 'groups[0].agents[0]', '*');
assert.nestedPropertyVal(parsed, 'groups[0].agents[1]', 'agent1');
assert.nestedPropertyVal(parsed, 'groups[0].agents[2]', 'agent2');
});
it('should ignore group members outside of a group', function (done) {
parse(getFixture('member-outside'))
.then(function (parsed) {
assert.deepPropertyVal(parsed, 'groups[0].agents[0]', '*');
assert.lengthOf(parsed.groups[0].agents, 1);
done();
})
.catch(done);
it('should ignore group members outside of a group', async function () {
const parsed = await parse(getFixture('member-outside'));
assert.nestedPropertyVal(parsed, 'groups[0].agents[0]', '*');
assert.lengthOf(parsed.groups[0].agents, 1);
});
it('should parse extensions', function (done) {
parse(getFixture('with-sitemap'))
.then(function (parsed) {
assert.deepPropertyVal(parsed, 'extensions[0].extension', 'sitemap');
assert.deepPropertyVal(parsed, 'extensions[0].value', '/sitemap.xml');
assert.deepPropertyVal(parsed, 'extensions[1].extension', 'sitemap');
assert.deepPropertyVal(parsed, 'extensions[1].value', 'http://example.com/alt_sitemap.xml');
done();
})
.catch(done);
it('should parse extensions', async function () {
const parsed = await parse(getFixture('with-sitemap'));
assert.nestedPropertyVal(parsed, 'extensions[0].extension', 'sitemap');
assert.nestedPropertyVal(parsed, 'extensions[0].value', '/sitemap.xml');
assert.nestedPropertyVal(parsed, 'extensions[1].extension', 'sitemap');
assert.nestedPropertyVal(parsed, 'extensions[1].value', 'http://example.com/alt_sitemap.xml');
});
it('should parse multiple groups', function (done) {
parse(getFixture('multiple-groups'))
.then(function (parsed) {
assert.deepPropertyVal(parsed, 'groups[0].agents[0]', '*');
assert.deepPropertyVal(parsed, 'groups[0].agents[1]', 'agent1');
assert.deepPropertyVal(parsed, 'groups[0].rules[0].rule', 'disallow');
assert.deepPropertyVal(parsed, 'groups[0].rules[0].path', '/path1');
assert.deepPropertyVal(parsed, 'groups[0].rules[1].rule', 'allow');
assert.deepPropertyVal(parsed, 'groups[0].rules[1].path', '/path2');
it('should parse multiple groups', async function () {
const parsed = await parse(getFixture('multiple-groups'));
assert.nestedPropertyVal(parsed, 'groups[0].agents[0]', '*');
assert.nestedPropertyVal(parsed, 'groups[0].agents[1]', 'agent1');
assert.nestedPropertyVal(parsed, 'groups[0].rules[0].rule', 'disallow');
assert.nestedPropertyVal(parsed, 'groups[0].rules[0].path', '/path1');
assert.nestedPropertyVal(parsed, 'groups[0].rules[1].rule', 'allow');
assert.nestedPropertyVal(parsed, 'groups[0].rules[1].path', '/path2');
assert.deepPropertyVal(parsed, 'groups[1].agents[0]', 'agent2');
assert.deepPropertyVal(parsed, 'groups[1].rules[0].rule', 'allow');
assert.deepPropertyVal(parsed, 'groups[1].rules[0].path', '/');
assert.nestedPropertyVal(parsed, 'groups[1].agents[0]', 'agent2');
assert.nestedPropertyVal(parsed, 'groups[1].rules[0].rule', 'allow');
assert.nestedPropertyVal(parsed, 'groups[1].rules[0].path', '/');
assert.deepPropertyVal(parsed, 'groups[2].agents[0]', 'agent3');
assert.deepPropertyVal(parsed, 'groups[2].rules[0].rule', 'disallow');
assert.deepPropertyVal(parsed, 'groups[2].rules[0].path', '/path3');
done();
})
.catch(done);
assert.nestedPropertyVal(parsed, 'groups[2].agents[0]', 'agent3');
assert.nestedPropertyVal(parsed, 'groups[2].rules[0].rule', 'disallow');
assert.nestedPropertyVal(parsed, 'groups[2].rules[0].path', '/path3');
});
it('should parse noindex', function (done) {
parse(getFixture('noindex'))
.then(function (parsed) {
assert.deepPropertyVal(parsed, 'groups[0].agents[0]', '*');
it('should parse noindex', async function () {
const parsed = await parse(getFixture('noindex'));
assert.nestedPropertyVal(parsed, 'groups[0].agents[0]', '*');
assert.deepPropertyVal(parsed, 'groups[0].rules[0].rule', 'allow');
assert.deepPropertyVal(parsed, 'groups[0].rules[0].path', '/path1');
assert.nestedPropertyVal(parsed, 'groups[0].rules[0].rule', 'allow');
assert.nestedPropertyVal(parsed, 'groups[0].rules[0].path', '/path1');
assert.deepPropertyVal(parsed, 'groups[0].rules[1].rule', 'disallow');
assert.deepPropertyVal(parsed, 'groups[0].rules[1].path', '/*/path2/');
assert.nestedPropertyVal(parsed, 'groups[0].rules[1].rule', 'disallow');
assert.nestedPropertyVal(parsed, 'groups[0].rules[1].path', '/*/path2/');
assert.deepPropertyVal(parsed, 'groups[0].rules[2].rule', 'noindex');
assert.deepPropertyVal(parsed, 'groups[0].rules[2].path', '/*/path2/');
assert.nestedPropertyVal(parsed, 'groups[0].rules[2].rule', 'noindex');
assert.nestedPropertyVal(parsed, 'groups[0].rules[2].path', '/*/path2/');
assert.deepPropertyVal(parsed, 'groups[0].rules[3].rule', 'noindex');
assert.deepPropertyVal(parsed, 'groups[0].rules[3].path', '/*/path3/');
done();
})
.catch(done);
assert.nestedPropertyVal(parsed, 'groups[0].rules[3].rule', 'noindex');
assert.nestedPropertyVal(parsed, 'groups[0].rules[3].path', '/*/path3/');
});
});

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc