sitemap-generator
Advanced tools
Comparing version 4.0.0 to 4.1.0
{ | ||
"name": "sitemap-generator", | ||
"version": "4.0.0", | ||
"version": "4.1.0", | ||
"description": "Easily create XML sitemaps for your website.", | ||
@@ -31,4 +31,4 @@ "homepage": "https://github.com/lgraubner/sitemap-generator", | ||
"cheerio": "^0.20.0", | ||
"lodash.assign": "^4.0.8", | ||
"lodash.forin": "^4.1.0", | ||
"lodash.assign": "^4.0.9", | ||
"lodash.forin": "^4.2.0", | ||
"robots": "^0.9.4", | ||
@@ -42,11 +42,15 @@ "simplecrawler": "^0.7.0", | ||
"license": "MIT", | ||
"pre-commit": ["precommit-msg", "lint"], | ||
"devDependencies": { | ||
"ava": "^0.14.0", | ||
"eslint": "^2.9.0", | ||
"eslint-config-graubnla": "^2.0.2", | ||
"lodash.isobject": "^3.0.2" | ||
"ava": "^0.15.2", | ||
"eslint": "^3.0.0", | ||
"eslint-config-graubnla": "^3.0.0", | ||
"lodash.isobject": "^3.0.2", | ||
"pre-commit": "^1.1.3" | ||
}, | ||
"scripts": { | ||
"test": "eslint SitemapGenerator.js && ava test/all.js" | ||
"lint": "eslint SitemapGenerator.js", | ||
"test": "npm run lint && ava test/all.js", | ||
"precommit-msg": "echo 'Pre-commit checks...' && exit 0" | ||
} | ||
} |
@@ -29,3 +29,3 @@ # Sitemap Generator | ||
The crawler will fetch all folder URL pages and file types [parsed by Google](https://support.google.com/webmasters/answer/35287?hl=en). If present the `robots.txt` will be taken into account and possible rules are applied for each URL to consider if it should be added to the sitemap. Also the crawler will not fetch URL's from a page if the robots meta tag with the value `nofollow` is present. The crawler is able to apply the `base` value to found links. | ||
The crawler will fetch all folder URL pages and file types [parsed by Google](https://support.google.com/webmasters/answer/35287?hl=en). If present the `robots.txt` will be taken into account and possible rules are applied for each URL to consider if it should be added to the sitemap. Also the crawler will not fetch URL's from a page if the robots meta tag with the value `nofollow` is present and ignore them completely if `noindex` rule is present. The crawler is able to apply the `base` value to found links. | ||
@@ -32,0 +32,0 @@ The protocol can be omitted, if the domain uses `http` or redirects to `https` are set up. |
@@ -77,3 +77,8 @@ 'use strict'; | ||
// set initial port | ||
this.crawler.initialPort = parseInt(this.options.port); | ||
var port = parseInt(this.options.port); | ||
// set port to 443 if https is present, respect user options | ||
if (this.baseUrl.protocol === 'https:' && this.options.port === 80) { | ||
port = 443; | ||
} | ||
this.crawler.initialPort = port; | ||
@@ -114,2 +119,5 @@ // set initial path to subpage if provided | ||
// array with urls that are crawled but shouldn't be indexed | ||
this.crawler.noindex = []; | ||
// custom discover function | ||
@@ -174,2 +182,8 @@ this.crawler.discoverResources = this._discoverResources; | ||
var metaRobots = $('meta[name="robots"]'); | ||
// add to noindex for it later to be removed from the store before a sitemap is built | ||
if (metaRobots.length && /noindex/i.test(metaRobots.attr('content'))) { | ||
this.noindex.push(queueItem.url); | ||
} | ||
if (metaRobots.length && /nofollow/i.test(metaRobots.attr('content'))) { | ||
@@ -224,3 +238,15 @@ return []; | ||
var sitemap = null; | ||
if (this.store.found.length > 0) { | ||
// Remove urls with a robots meta tag 'noindex' before building the sitemap | ||
this.crawler.noindex.forEach(function (page) { | ||
var index = this.store.found.indexOf(page); | ||
if (index !== -1) { | ||
// remove url from found array | ||
var ignored = this.store.found.splice(index, 1)[0]; | ||
// add url to ignored url | ||
this.store.ignored.push(ignored); | ||
} | ||
}, this); | ||
// xml base | ||
@@ -227,0 +253,0 @@ var xml = xmlbuilder.create('urlset', { version: '1.0', encoding: 'UTF-8' }) |
@@ -10,4 +10,6 @@ /* eslint no-unused-vars:0 */ | ||
// start testserver | ||
test.before(function () { | ||
server.listen(port, localhost); | ||
test.cb.before(function (t) { | ||
server.listen(port, localhost, function () { | ||
t.end(); | ||
}); | ||
}); | ||
@@ -22,5 +24,7 @@ | ||
test.after(function () { | ||
test.cb.after(function (t) { | ||
// stop test server | ||
server.close(); | ||
server.close(function () { | ||
t.end(); | ||
}); | ||
}); |
@@ -41,2 +41,18 @@ /* eslint no-unused-vars:0 */ | ||
test.cb('should ignore pages with "noindex" rule', function (t) { | ||
t.plan(2); | ||
var generator = new SitemapGenerator(localhost, { | ||
port: port, | ||
}); | ||
generator.on('done', function (sitemap, store) { | ||
t.is(store.found.indexOf(buildUrl(localhost, port, '/noindex')), -1); | ||
t.not(store.ignored.indexOf(buildUrl(localhost, port, '/noindex')), -1); | ||
t.end(); | ||
}); | ||
generator.start(); | ||
}); | ||
test.cb('should restrict subsequent requests to given path', function (t) { | ||
@@ -43,0 +59,0 @@ t.plan(1); |
@@ -10,2 +10,3 @@ /* eslint-disable */ | ||
'<a href="/single">Single</a>', | ||
'<a href="/noindex">Noindex</a>', | ||
].join('\n')); | ||
@@ -145,2 +146,10 @@ res.end(); | ||
'/noindex': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<meta name="robots" content="noindex,follow">', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
'/robots.txt': function (req, res) { | ||
@@ -147,0 +156,0 @@ res.writeHead(200, { 'Content-Type': 'text/html' }); |
@@ -19,3 +19,3 @@ /* eslint no-unused-vars:0 */ | ||
// sitemap | ||
t.regex(sitemap, /^<\?xml version="1.0" encoding="UTF-8"\?>/, 'has xml header'); | ||
t.regex(sitemap, /^<\?xml version="1.0" encoding="UTF\-8"\?>/, 'has xml header'); | ||
var urlsRegex = /<urlset xmlns=".+?">(.|\n)+<\/urlset>/; | ||
@@ -22,0 +22,0 @@ t.regex(sitemap, urlsRegex, 'has urlset property'); |
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
30079
769
2
5
Updatedlodash.assign@^4.0.9
Updatedlodash.forin@^4.2.0