robots-txt-parse
Advanced tools
Comparing version 1.0.1 to 2.0.0
172
lib/parse.js
'use strict'; | ||
var split = require('split'), | ||
through = require('through'), | ||
combine = require('stream-combiner'), | ||
Promise = require('bluebird'); | ||
const split = require('split'); | ||
const { pipeline, Transform } = require('stream'); | ||
const START_GROUP = 'START_GROUP'; | ||
const GROUP_MEMBER = 'GROUP_MEMBER'; | ||
const NON_GROUP = 'NON_GROUP'; | ||
/** | ||
* @typedef {{ | ||
* type: START_GROUP | ||
* agent: string | ||
* } | { | ||
* type: GROUP_MEMBER | ||
* rule: string | ||
* path: string | ||
* } | { | ||
* type: NON_GROUP | ||
* field: string | ||
* value: string | ||
* }} Token | ||
*/ | ||
/** | ||
* @typedef {{ | ||
* rule: string | ||
* path: string | ||
* }} GroupRule | ||
* @typedef {{ | ||
* agents: string[] | ||
* rules: GroupRule[] | ||
* }} Group | ||
* @typedef {{ | ||
* extension: string | ||
* value: string | ||
*}} Extension | ||
* @typedef {{ | ||
* groups: Group[] | ||
* extensions: Extension[] | ||
* }} ParseResult | ||
*/ | ||
var START_GROUP = 'START_GROUP', | ||
GROUP_MEMBER = 'GROUP_MEMBER', | ||
NON_GROUP = 'NON_GROUP'; | ||
/** | ||
* @param {string} line | ||
* @returns {Token | null} | ||
*/ | ||
function parseLine (line) { | ||
const commentFree = line.replace(/#.*$/, ''); | ||
const index = commentFree.indexOf(':'); | ||
function parseLine(line) { | ||
var commentFree = line.replace(/#.*$/, ''), | ||
index = commentFree.indexOf(':'); | ||
if (index === -1) return null; | ||
if(index === -1) return null; | ||
const field = commentFree.substr(0, index).trim().toLowerCase(); | ||
const value = commentFree.substr(index + 1).trim(); | ||
var field = commentFree.substr(0,index).trim().toLowerCase(), | ||
value = commentFree.substr(index+1).trim(); | ||
switch (field) { | ||
case 'user-agent': | ||
return { | ||
type : START_GROUP, | ||
type: START_GROUP, | ||
agent: value | ||
@@ -40,3 +73,3 @@ }; | ||
return { | ||
type : NON_GROUP, | ||
type: NON_GROUP, | ||
field: field, | ||
@@ -48,8 +81,11 @@ value: value | ||
function tokenize() { | ||
return through(function (line) { | ||
var token = parseLine(line); | ||
if (token) { | ||
this.queue(token); | ||
function tokenize () { | ||
return new Transform({ | ||
readableObjectMode: true, | ||
transform (line, encoding, callback) { | ||
const token = parseLine(String(line)); | ||
if (token) { | ||
this.push(token); | ||
} | ||
callback(); | ||
} | ||
@@ -59,5 +95,9 @@ }); | ||
module.exports = function parse(content) { | ||
var result = { | ||
/** | ||
* @param {import('stream').Readable} content | ||
* @returns {Promise<ParseResult>} | ||
*/ | ||
module.exports = function parse (content) { | ||
/** @type {ParseResult} */ | ||
const result = { | ||
groups: [], | ||
@@ -67,50 +107,58 @@ extensions: [] | ||
var prevToken = null, | ||
currentGroup = null; | ||
/** @type {string | null} */ | ||
let prevToken = null; | ||
/** @type {Group | null} */ | ||
let currentGroup = null; | ||
var build = through(function (token) { | ||
switch (token.type) { | ||
case START_GROUP: | ||
if (prevToken !== START_GROUP) { | ||
currentGroup = { | ||
agents : [], | ||
rules : [] | ||
}; | ||
result.groups.push(currentGroup); | ||
} | ||
currentGroup.agents.push(token.agent); | ||
break; | ||
case GROUP_MEMBER: | ||
if (currentGroup) { | ||
currentGroup.rules.push({ | ||
rule: token.rule, | ||
path: token.path | ||
const build = new Transform({ | ||
objectMode: true, | ||
transform (token, encoding, callback) { | ||
switch (token.type) { | ||
case START_GROUP: | ||
if (prevToken !== START_GROUP) { | ||
currentGroup = { | ||
agents: [], | ||
rules: [] | ||
}; | ||
result.groups.push(currentGroup); | ||
} | ||
if (currentGroup) { | ||
currentGroup.agents.push(token.agent); | ||
} | ||
break; | ||
case GROUP_MEMBER: | ||
if (currentGroup) { | ||
currentGroup.rules.push({ | ||
rule: token.rule, | ||
path: token.path | ||
}); | ||
} | ||
break; | ||
case NON_GROUP: | ||
result.extensions.push({ | ||
extension: token.field, | ||
value: token.value | ||
}); | ||
} | ||
break; | ||
case NON_GROUP: | ||
result.extensions.push({ | ||
extension: token.field, | ||
value: token.value | ||
}); | ||
break; | ||
break; | ||
} | ||
prevToken = token.type; | ||
callback(); | ||
} | ||
prevToken = token.type; | ||
}); | ||
return new Promise(function (resolve, reject) { | ||
combine( | ||
pipeline( | ||
content, | ||
split(), | ||
tokenize(), | ||
build | ||
) | ||
.on('error', reject) | ||
.on('end', function () { | ||
build, | ||
(err) => { | ||
if (err) { | ||
return reject(err); | ||
} | ||
resolve(result); | ||
}); | ||
} | ||
); | ||
}); | ||
}; | ||
{ | ||
"name": "robots-txt-parse", | ||
"version": "1.0.1", | ||
"version": "2.0.0", | ||
"description": "Streaming parser for robots.txt files", | ||
@@ -13,3 +13,7 @@ "main": "lib/parse.js", | ||
"test-watch": "mocha -w -R spec ./test", | ||
"version": "echo $npm_package_version" | ||
"version": "echo $npm_package_version", | ||
"test:ci": "npm run lint && npm run typescript && npm run test", | ||
"typescript": "tsc", | ||
"lint": "semistandard", | ||
"fix": "semistandard --fix" | ||
}, | ||
@@ -19,11 +23,13 @@ "author": "Jan Potoms", | ||
"devDependencies": { | ||
"chai": "^1.9.1", | ||
"mocha": "^1.18.2" | ||
"@types/node": "^14.14.31", | ||
"@types/split": "^1.0.0", | ||
"@types/through": "0.0.30", | ||
"chai": "^4.3.0", | ||
"mocha": "^8.3.0", | ||
"semistandard": "^16.0.0", | ||
"typescript": "^4.1.5" | ||
}, | ||
"dependencies": { | ||
"bluebird": "^2.3.5", | ||
"split": "^0.3.0", | ||
"stream-combiner": "^0.2.1", | ||
"through": "^2.3.4" | ||
"split": "^1.0.1" | ||
} | ||
} |
@@ -1,128 +0,96 @@ | ||
/*global describe, it*/ | ||
/* global describe, it */ | ||
'use strict'; | ||
var parse = require('../lib/parse'), | ||
fs = require('fs'), | ||
path = require('path'), | ||
assert = require('chai').assert; | ||
const parse = require('../lib/parse'); | ||
const fs = require('fs'); | ||
const path = require('path'); | ||
const assert = require('chai').assert; | ||
function getFixture(name) { | ||
var fixturePath = path.resolve(__dirname, 'fixtures', name + '.txt'), | ||
stream = fs.createReadStream(fixturePath); | ||
function getFixture (name) { | ||
const fixturePath = path.resolve(__dirname, 'fixtures', name + '.txt'); | ||
const stream = fs.createReadStream(fixturePath); | ||
return stream; | ||
} | ||
describe('parser', function () { | ||
it('should parse a simple group', async function () { | ||
const parsed = await parse(getFixture('single-group')); | ||
assert.isObject(parsed); | ||
assert.property(parsed, 'groups'); | ||
assert.isArray(parsed.groups); | ||
assert.lengthOf(parsed.groups, 1); | ||
const group = parsed.groups[0]; | ||
assert.isObject(group); | ||
it('should parse a simple group', function (done) { | ||
parse(getFixture('single-group')) | ||
.then(function (parsed) { | ||
assert.isObject(parsed); | ||
assert.property(parsed, 'groups'); | ||
assert.isArray(parsed.groups); | ||
assert.lengthOf(parsed.groups, 1); | ||
var group = parsed.groups[0]; | ||
assert.isObject(group); | ||
assert.property(group, 'agents'); | ||
assert.isArray(group.agents); | ||
assert.lengthOf(group.agents, 1); | ||
assert.strictEqual(group.agents[0], '*'); | ||
assert.property(group, 'agents'); | ||
assert.isArray(group.agents); | ||
assert.lengthOf(group.agents, 1); | ||
assert.strictEqual(group.agents[0], '*'); | ||
assert.property(group, 'rules'); | ||
assert.isArray(group.rules); | ||
assert.lengthOf(group.rules, 1); | ||
const rule = group.rules[0]; | ||
assert.property(group, 'rules'); | ||
assert.isArray(group.rules); | ||
assert.lengthOf(group.rules, 1); | ||
var rule = group.rules[0]; | ||
assert.isObject(rule); | ||
assert.propertyVal(rule, 'rule', 'disallow'); | ||
assert.propertyVal(rule, 'path', '/'); | ||
done(); | ||
}) | ||
.catch(done); | ||
assert.isObject(rule); | ||
assert.propertyVal(rule, 'rule', 'disallow'); | ||
assert.propertyVal(rule, 'path', '/'); | ||
}); | ||
it('should parse multiple agents', function (done) { | ||
parse(getFixture('multiple-agents')) | ||
.then(function (parsed) { | ||
assert.deepPropertyVal(parsed, 'groups[0].agents[0]', '*'); | ||
assert.deepPropertyVal(parsed, 'groups[0].agents[1]', 'agent1'); | ||
assert.deepPropertyVal(parsed, 'groups[0].agents[2]', 'agent2'); | ||
done(); | ||
}) | ||
.catch(done); | ||
it('should parse multiple agents', async function () { | ||
const parsed = await parse(getFixture('multiple-agents')); | ||
assert.nestedPropertyVal(parsed, 'groups[0].agents[0]', '*'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].agents[1]', 'agent1'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].agents[2]', 'agent2'); | ||
}); | ||
it('should ignore group members outside of a group', function (done) { | ||
parse(getFixture('member-outside')) | ||
.then(function (parsed) { | ||
assert.deepPropertyVal(parsed, 'groups[0].agents[0]', '*'); | ||
assert.lengthOf(parsed.groups[0].agents, 1); | ||
done(); | ||
}) | ||
.catch(done); | ||
it('should ignore group members outside of a group', async function () { | ||
const parsed = await parse(getFixture('member-outside')); | ||
assert.nestedPropertyVal(parsed, 'groups[0].agents[0]', '*'); | ||
assert.lengthOf(parsed.groups[0].agents, 1); | ||
}); | ||
it('should parse extensions', function (done) { | ||
parse(getFixture('with-sitemap')) | ||
.then(function (parsed) { | ||
assert.deepPropertyVal(parsed, 'extensions[0].extension', 'sitemap'); | ||
assert.deepPropertyVal(parsed, 'extensions[0].value', '/sitemap.xml'); | ||
assert.deepPropertyVal(parsed, 'extensions[1].extension', 'sitemap'); | ||
assert.deepPropertyVal(parsed, 'extensions[1].value', 'http://example.com/alt_sitemap.xml'); | ||
done(); | ||
}) | ||
.catch(done); | ||
it('should parse extensions', async function () { | ||
const parsed = await parse(getFixture('with-sitemap')); | ||
assert.nestedPropertyVal(parsed, 'extensions[0].extension', 'sitemap'); | ||
assert.nestedPropertyVal(parsed, 'extensions[0].value', '/sitemap.xml'); | ||
assert.nestedPropertyVal(parsed, 'extensions[1].extension', 'sitemap'); | ||
assert.nestedPropertyVal(parsed, 'extensions[1].value', 'http://example.com/alt_sitemap.xml'); | ||
}); | ||
it('should parse multiple groups', function (done) { | ||
parse(getFixture('multiple-groups')) | ||
.then(function (parsed) { | ||
assert.deepPropertyVal(parsed, 'groups[0].agents[0]', '*'); | ||
assert.deepPropertyVal(parsed, 'groups[0].agents[1]', 'agent1'); | ||
assert.deepPropertyVal(parsed, 'groups[0].rules[0].rule', 'disallow'); | ||
assert.deepPropertyVal(parsed, 'groups[0].rules[0].path', '/path1'); | ||
assert.deepPropertyVal(parsed, 'groups[0].rules[1].rule', 'allow'); | ||
assert.deepPropertyVal(parsed, 'groups[0].rules[1].path', '/path2'); | ||
it('should parse multiple groups', async function () { | ||
const parsed = await parse(getFixture('multiple-groups')); | ||
assert.nestedPropertyVal(parsed, 'groups[0].agents[0]', '*'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].agents[1]', 'agent1'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].rules[0].rule', 'disallow'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].rules[0].path', '/path1'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].rules[1].rule', 'allow'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].rules[1].path', '/path2'); | ||
assert.deepPropertyVal(parsed, 'groups[1].agents[0]', 'agent2'); | ||
assert.deepPropertyVal(parsed, 'groups[1].rules[0].rule', 'allow'); | ||
assert.deepPropertyVal(parsed, 'groups[1].rules[0].path', '/'); | ||
assert.nestedPropertyVal(parsed, 'groups[1].agents[0]', 'agent2'); | ||
assert.nestedPropertyVal(parsed, 'groups[1].rules[0].rule', 'allow'); | ||
assert.nestedPropertyVal(parsed, 'groups[1].rules[0].path', '/'); | ||
assert.deepPropertyVal(parsed, 'groups[2].agents[0]', 'agent3'); | ||
assert.deepPropertyVal(parsed, 'groups[2].rules[0].rule', 'disallow'); | ||
assert.deepPropertyVal(parsed, 'groups[2].rules[0].path', '/path3'); | ||
done(); | ||
}) | ||
.catch(done); | ||
assert.nestedPropertyVal(parsed, 'groups[2].agents[0]', 'agent3'); | ||
assert.nestedPropertyVal(parsed, 'groups[2].rules[0].rule', 'disallow'); | ||
assert.nestedPropertyVal(parsed, 'groups[2].rules[0].path', '/path3'); | ||
}); | ||
it('should parse noindex', function (done) { | ||
parse(getFixture('noindex')) | ||
.then(function (parsed) { | ||
assert.deepPropertyVal(parsed, 'groups[0].agents[0]', '*'); | ||
it('should parse noindex', async function () { | ||
const parsed = await parse(getFixture('noindex')); | ||
assert.nestedPropertyVal(parsed, 'groups[0].agents[0]', '*'); | ||
assert.deepPropertyVal(parsed, 'groups[0].rules[0].rule', 'allow'); | ||
assert.deepPropertyVal(parsed, 'groups[0].rules[0].path', '/path1'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].rules[0].rule', 'allow'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].rules[0].path', '/path1'); | ||
assert.deepPropertyVal(parsed, 'groups[0].rules[1].rule', 'disallow'); | ||
assert.deepPropertyVal(parsed, 'groups[0].rules[1].path', '/*/path2/'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].rules[1].rule', 'disallow'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].rules[1].path', '/*/path2/'); | ||
assert.deepPropertyVal(parsed, 'groups[0].rules[2].rule', 'noindex'); | ||
assert.deepPropertyVal(parsed, 'groups[0].rules[2].path', '/*/path2/'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].rules[2].rule', 'noindex'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].rules[2].path', '/*/path2/'); | ||
assert.deepPropertyVal(parsed, 'groups[0].rules[3].rule', 'noindex'); | ||
assert.deepPropertyVal(parsed, 'groups[0].rules[3].path', '/*/path3/'); | ||
done(); | ||
}) | ||
.catch(done); | ||
assert.nestedPropertyVal(parsed, 'groups[0].rules[3].rule', 'noindex'); | ||
assert.nestedPropertyVal(parsed, 'groups[0].rules[3].path', '/*/path3/'); | ||
}); | ||
}); |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
12214
1
17
239
7
1
+ Addedsplit@1.0.1(transitive)
- Removedbluebird@^2.3.5
- Removedstream-combiner@^0.2.1
- Removedthrough@^2.3.4
- Removedbluebird@2.11.0(transitive)
- Removedduplexer@0.1.2(transitive)
- Removedsplit@0.3.3(transitive)
- Removedstream-combiner@0.2.2(transitive)
Updatedsplit@^1.0.1