sitemap-generator
Advanced tools
Comparing version 5.0.1 to 6.0.0
@@ -12,2 +12,3 @@ 'use strict'; | ||
var forIn = require('lodash.forin'); | ||
var chunk = require('lodash.chunk'); | ||
@@ -24,2 +25,46 @@ /** | ||
/** | ||
* Builds XML markup for given URLs. | ||
* | ||
* @return {String} XML markup | ||
*/ | ||
function generateSitemap(urls) { | ||
// xml base | ||
var xml = xmlbuilder.create('urlset', { version: '1.0', encoding: 'UTF-8' }) | ||
.att('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9'); | ||
// add elements | ||
forIn(urls, function (u) { | ||
xml.ele('url') | ||
.ele({ | ||
loc: u, | ||
}); | ||
}); | ||
// finish xml markup | ||
return xml.end({ pretty: true, indent: ' ', newline: '\n' }); | ||
} | ||
/** | ||
* Create XML markup for an sitemap index file. | ||
* | ||
* @return {String} XML markup | ||
*/ | ||
function generateSitemapIndex(baseUrl, count) { | ||
// xml base | ||
var xml = xmlbuilder.create('sitemapindex', { version: '1.0', encoding: 'UTF-8' }) | ||
.att('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9'); | ||
var i; | ||
for (i = 1; i <= count; i++) { | ||
xml.ele('sitemap') | ||
.ele({ | ||
loc: url.resolve(baseUrl, 'sitemap_part' + i + '.xml'), | ||
}); | ||
} | ||
// finish xml markup | ||
return xml.end({ pretty: true, indent: ' ', newline: '\n' }); | ||
} | ||
/** | ||
* Generator object, handling the crawler and sitemap generation. | ||
@@ -37,2 +82,4 @@ * | ||
restrictToBasepath: false, | ||
maxEntriesPerFile: 50000, | ||
crawlerMaxDepth: 0, | ||
}; | ||
@@ -85,2 +132,5 @@ | ||
// Set maxDepth if applicable | ||
this.crawler.maxDepth = this.options.crawlerMaxDepth; | ||
// decode responses | ||
@@ -190,2 +240,8 @@ this.crawler.decodeResponses = true; | ||
// exclude rel="nofollow" links | ||
var rel = $(this).attr('rel'); | ||
if (/nofollow/i.test(rel)) { | ||
return null; | ||
} | ||
// remove anchors | ||
@@ -226,3 +282,3 @@ href = href.replace(/(#.*)$/, ''); | ||
SitemapGenerator.prototype._buildXML = function (callback) { | ||
var sitemap = null; | ||
var sitemaps = null; | ||
@@ -241,20 +297,16 @@ if (this.store.found.length > 0 && this.store.found.length !== this.crawler.noindex.length) { | ||
// xml base | ||
var xml = xmlbuilder.create('urlset', { version: '1.0', encoding: 'UTF-8' }) | ||
.att('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9'); | ||
var parts = chunk(this.store.found, this.options.maxEntriesPerFile); | ||
sitemaps = parts.reduce(function (maps, part) { | ||
maps.push(generateSitemap(part)); | ||
return maps; | ||
}, []); | ||
// add elements | ||
forIn(this.store.found, function (foundURL) { | ||
xml.ele('url') | ||
.ele({ | ||
loc: foundURL, | ||
}); | ||
}); | ||
// finish xml markup | ||
sitemap = xml.end({ pretty: true, indent: ' ', newline: '\n' }); | ||
if (parts.length > 1) { | ||
var baseUrl = this.baseUrl.protocol + '//' + this.baseUrl.hostname; | ||
sitemaps.unshift(generateSitemapIndex(baseUrl, parts.length)); | ||
} | ||
} | ||
if (typeof callback === 'function') { | ||
callback.call(this, sitemap); | ||
callback.call(this, sitemaps); | ||
} | ||
@@ -261,0 +313,0 @@ }; |
{ | ||
"name": "sitemap-generator", | ||
"version": "5.0.1", | ||
"version": "6.0.0", | ||
"description": "Easily create XML sitemaps for your website.", | ||
@@ -32,5 +32,6 @@ "homepage": "https://github.com/lgraubner/sitemap-generator", | ||
"lodash.assign": "^4.0.9", | ||
"lodash.chunk": "4.2.0", | ||
"lodash.forin": "^4.2.0", | ||
"robots": "^0.9.4", | ||
"simplecrawler": "1.0.3", | ||
"robots": "^0.9.5", | ||
"simplecrawler": "1.0.5", | ||
"xmlbuilder": "^8.2.2" | ||
@@ -47,6 +48,8 @@ }, | ||
"devDependencies": { | ||
"ava": "^0.17.0", | ||
"eslint": "^3.13.1", | ||
"ava": "^0.18.2", | ||
"eslint": "^3.16.1", | ||
"eslint-config-graubnla": "^3.0.0", | ||
"lodash.isarray": "4.0.0", | ||
"lodash.isobject": "^3.0.2", | ||
"lodash.isstring": "4.0.1", | ||
"pre-commit": "^1.2.2" | ||
@@ -53,0 +56,0 @@ }, |
@@ -21,4 +21,4 @@ # Sitemap Generator | ||
// register event listeners | ||
generator.on('done', function (sitemap) { | ||
console.log(sitemap); // => prints xml sitemap | ||
generator.on('done', function (sitemaps) { | ||
console.log(sitemaps); // => array of generated sitemaps | ||
}); | ||
@@ -40,2 +40,4 @@ | ||
stripQuerystring: true, | ||
maxEntriesPerFile: 50000, | ||
crawlerMaxDepth: 0, | ||
}); | ||
@@ -60,2 +62,16 @@ ``` | ||
### maxEntriesPerFile | ||
Type: `number` | ||
Default: `50000` | ||
Google limits the maximum number of URLs in one sitemap to 50000. If this limit is reached the sitemap-generator creates another sitemap. In that case the first entry of the `sitemaps` array is a sitemapindex file. | ||
### crawlerMaxDepth | ||
Type: `number` | ||
Default: `0` | ||
Defines a maximum distance from the original request at which resources will be fetched. | ||
## Events | ||
@@ -97,8 +113,8 @@ | ||
Triggered when the crawler finished and the sitemap is created. Passes the created XML markup as callback argument. The second argument provides an object containing found URL's, ignored URL's and faulty URL's. | ||
Triggered when the crawler finished and the sitemap is created. Passes the created sitemaps as callback argument. The second argument provides an object containing found URL's, ignored URL's and faulty URL's. | ||
```JavaScript | ||
generator.on('done', function (sitemap, store) { | ||
// do something with the sitemap, e.g. save as file | ||
generator.on('done', function (sitemaps, store) { | ||
// do something with the sitemaps, e.g. save as file | ||
}); | ||
``` |
@@ -5,2 +5,4 @@ /* eslint no-unused-vars:0 */ | ||
var isObject = require('lodash.isobject'); | ||
var isString = require('lodash.isstring'); | ||
var isArray = require('lodash.isarray'); | ||
var baseUrl = require('./lib/constants').baseUrl; | ||
@@ -19,6 +21,6 @@ var port = require('./lib/constants').port; | ||
generator.on('fetch', function (status, url) { | ||
t.is(typeof status, 'string', 'status is a string'); | ||
t.truthy(isString(status), 'status is a string'); | ||
t.regex(status, /(NOT FOUND|OK)/, 'is a valid status'); | ||
t.is(typeof url, 'string', 'url is a string'); | ||
t.truthy(isString(url), 'url is a string'); | ||
t.regex(url, /^https?:\/\//, 'is a valid url'); | ||
@@ -38,3 +40,3 @@ | ||
generator.on('ignore', function (url) { | ||
t.is(typeof url, 'string', 'url is a string'); | ||
t.truthy(isString(url), 'url is a string'); | ||
t.regex(url, /^https?:\/\//, 'is a valid url'); | ||
@@ -53,5 +55,5 @@ | ||
generator.on('done', function (sitemap, store) { | ||
generator.on('done', function (sitemaps, store) { | ||
// sitemap | ||
t.is(typeof sitemap, 'string', 'returns xml string'); | ||
t.truthy(isArray(sitemaps), 'returns array'); | ||
@@ -58,0 +60,0 @@ // store |
@@ -16,4 +16,4 @@ /* eslint no-unused-vars:0 */ | ||
generator.on('done', function (sitemap, store) { | ||
t.regex(sitemap, /[^img.jpg]/, 'does not contain img.jpg'); | ||
generator.on('done', function (sitemaps, store) { | ||
t.regex(sitemaps[0], /[^img.jpg]/, 'does not contain img.jpg'); | ||
t.end(); | ||
@@ -20,0 +20,0 @@ }); |
@@ -17,3 +17,5 @@ /* eslint no-unused-vars:0 */ | ||
restrictToBasepath: false, | ||
maxEntriesPerFile: 50000, | ||
crawlerMaxDepth: 0, | ||
}, 'objects are equal'); | ||
}); |
@@ -16,9 +16,9 @@ /* eslint no-unused-vars:0 */ | ||
generator.on('done', function (sitemap, store) { | ||
generator.on('done', function (sitemaps, store) { | ||
// sitemap | ||
t.regex(sitemap, /^<\?xml version="1.0" encoding="UTF\-8"\?>/, 'has xml header'); | ||
t.regex(sitemaps[0], /^<\?xml version="1.0" encoding="UTF\-8"\?>/, 'has xml header'); | ||
var urlsRegex = /<urlset xmlns=".+?">(.|\n)+<\/urlset>/; | ||
t.regex(sitemap, urlsRegex, 'has urlset property'); | ||
t.truthy(sitemap.match(/<url>(.|\n)+?<\/url>/g), 'contains url properties'); | ||
t.truthy(sitemap.match(/<loc>(.|\n)+?<\/loc>/g), 'contains loc properties'); | ||
t.regex(sitemaps[0], urlsRegex, 'has urlset property'); | ||
t.truthy(sitemaps[0].match(/<url>(.|\n)+?<\/url>/g), 'contains url properties'); | ||
t.truthy(sitemaps[0].match(/<loc>(.|\n)+?<\/loc>/g), 'contains loc properties'); | ||
@@ -35,4 +35,4 @@ t.end(); | ||
var generator = new SitemapGenerator('invalid'); | ||
generator.on('done', function (sitemap) { | ||
t.is(sitemap, null, 'returns "null"'); | ||
generator.on('done', function (sitemaps) { | ||
t.is(sitemaps, null, 'returns "null"'); | ||
t.end(); | ||
@@ -42,1 +42,15 @@ }); | ||
}); | ||
test.cb('should create multiple sitemaps', function (t) { | ||
t.plan(2); | ||
var generator = new SitemapGenerator(buildUrl(baseUrl, port, ''), { | ||
maxEntriesPerFile: 1, | ||
}); | ||
generator.on('done', function (sitemaps) { | ||
t.truthy(sitemaps.length > 1, 'creates more than 1 sitemap'); | ||
t.regex(sitemaps[0], /sitemapindex/, 'creates sitemapindex file'); | ||
t.end(); | ||
}); | ||
generator.start(); | ||
}); |
153335
770
117
7
7
+ Addedlodash.chunk@4.2.0
+ Addedlodash.chunk@4.2.0(transitive)
+ Addedsimplecrawler@1.0.5(transitive)
- Removedsimplecrawler@1.0.3(transitive)
Updatedrobots@^0.9.5
Updatedsimplecrawler@1.0.5