New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

sitemap-generator

Package Overview
Dependencies
Maintainers
1
Versions
61
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sitemap-generator - npm Package Compare versions

Comparing version 5.0.1 to 6.0.0

82

lib/SitemapGenerator.js

@@ -12,2 +12,3 @@ 'use strict';

var forIn = require('lodash.forin');
var chunk = require('lodash.chunk');

@@ -24,2 +25,46 @@ /**

/**
* Builds XML markup for given URLs.
*
* @return {String} XML markup
*/
function generateSitemap(urls) {
// xml base
var xml = xmlbuilder.create('urlset', { version: '1.0', encoding: 'UTF-8' })
.att('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9');
// add elements
forIn(urls, function (u) {
xml.ele('url')
.ele({
loc: u,
});
});
// finish xml markup
return xml.end({ pretty: true, indent: ' ', newline: '\n' });
}
/**
* Create XML markup for an sitemap index file.
*
* @return {String} XML markup
*/
function generateSitemapIndex(baseUrl, count) {
// xml base
var xml = xmlbuilder.create('sitemapindex', { version: '1.0', encoding: 'UTF-8' })
.att('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9');
var i;
for (i = 1; i <= count; i++) {
xml.ele('sitemap')
.ele({
loc: url.resolve(baseUrl, 'sitemap_part' + i + '.xml'),
});
}
// finish xml markup
return xml.end({ pretty: true, indent: ' ', newline: '\n' });
}
/**
* Generator object, handling the crawler and sitemap generation.

@@ -37,2 +82,4 @@ *

restrictToBasepath: false,
maxEntriesPerFile: 50000,
crawlerMaxDepth: 0,
};

@@ -85,2 +132,5 @@

// Set maxDepth if applicable
this.crawler.maxDepth = this.options.crawlerMaxDepth;
// decode responses

@@ -190,2 +240,8 @@ this.crawler.decodeResponses = true;

// exclude rel="nofollow" links
var rel = $(this).attr('rel');
if (/nofollow/i.test(rel)) {
return null;
}
// remove anchors

@@ -226,3 +282,3 @@ href = href.replace(/(#.*)$/, '');

SitemapGenerator.prototype._buildXML = function (callback) {
var sitemap = null;
var sitemaps = null;

@@ -241,20 +297,16 @@ if (this.store.found.length > 0 && this.store.found.length !== this.crawler.noindex.length) {

// xml base
var xml = xmlbuilder.create('urlset', { version: '1.0', encoding: 'UTF-8' })
.att('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9');
var parts = chunk(this.store.found, this.options.maxEntriesPerFile);
sitemaps = parts.reduce(function (maps, part) {
maps.push(generateSitemap(part));
return maps;
}, []);
// add elements
forIn(this.store.found, function (foundURL) {
xml.ele('url')
.ele({
loc: foundURL,
});
});
// finish xml markup
sitemap = xml.end({ pretty: true, indent: ' ', newline: '\n' });
if (parts.length > 1) {
var baseUrl = this.baseUrl.protocol + '//' + this.baseUrl.hostname;
sitemaps.unshift(generateSitemapIndex(baseUrl, parts.length));
}
}
if (typeof callback === 'function') {
callback.call(this, sitemap);
callback.call(this, sitemaps);
}

@@ -261,0 +313,0 @@ };

13

package.json
{
"name": "sitemap-generator",
"version": "5.0.1",
"version": "6.0.0",
"description": "Easily create XML sitemaps for your website.",

@@ -32,5 +32,6 @@ "homepage": "https://github.com/lgraubner/sitemap-generator",

"lodash.assign": "^4.0.9",
"lodash.chunk": "4.2.0",
"lodash.forin": "^4.2.0",
"robots": "^0.9.4",
"simplecrawler": "1.0.3",
"robots": "^0.9.5",
"simplecrawler": "1.0.5",
"xmlbuilder": "^8.2.2"

@@ -47,6 +48,8 @@ },

"devDependencies": {
"ava": "^0.17.0",
"eslint": "^3.13.1",
"ava": "^0.18.2",
"eslint": "^3.16.1",
"eslint-config-graubnla": "^3.0.0",
"lodash.isarray": "4.0.0",
"lodash.isobject": "^3.0.2",
"lodash.isstring": "4.0.1",
"pre-commit": "^1.2.2"

@@ -53,0 +56,0 @@ },

@@ -21,4 +21,4 @@ # Sitemap Generator

// register event listeners
generator.on('done', function (sitemap) {
console.log(sitemap); // => prints xml sitemap
generator.on('done', function (sitemaps) {
console.log(sitemaps); // => array of generated sitemaps
});

@@ -40,2 +40,4 @@

stripQuerystring: true,
maxEntriesPerFile: 50000,
crawlerMaxDepth: 0,
});

@@ -60,2 +62,16 @@ ```

### maxEntriesPerFile
Type: `number`
Default: `50000`
Google limits the maximum number of URLs in one sitemap to 50000. If this limit is reached the sitemap-generator creates another sitemap. In that case the first entry of the `sitemaps` array is a sitemapindex file.
### crawlerMaxDepth
Type: `number`
Default: `0`
Defines a maximum distance from the original request at which resources will be fetched.
## Events

@@ -97,8 +113,8 @@

Triggered when the crawler finished and the sitemap is created. Passes the created XML markup as callback argument. The second argument provides an object containing found URL's, ignored URL's and faulty URL's.
Triggered when the crawler finished and the sitemap is created. Passes the created sitemaps as callback argument. The second argument provides an object containing found URL's, ignored URL's and faulty URL's.
```JavaScript
generator.on('done', function (sitemap, store) {
// do something with the sitemap, e.g. save as file
generator.on('done', function (sitemaps, store) {
// do something with the sitemaps, e.g. save as file
});
```

@@ -5,2 +5,4 @@ /* eslint no-unused-vars:0 */

var isObject = require('lodash.isobject');
var isString = require('lodash.isstring');
var isArray = require('lodash.isarray');
var baseUrl = require('./lib/constants').baseUrl;

@@ -19,6 +21,6 @@ var port = require('./lib/constants').port;

generator.on('fetch', function (status, url) {
t.is(typeof status, 'string', 'status is a string');
t.truthy(isString(status), 'status is a string');
t.regex(status, /(NOT FOUND|OK)/, 'is a valid status');
t.is(typeof url, 'string', 'url is a string');
t.truthy(isString(url), 'url is a string');
t.regex(url, /^https?:\/\//, 'is a valid url');

@@ -38,3 +40,3 @@

generator.on('ignore', function (url) {
t.is(typeof url, 'string', 'url is a string');
t.truthy(isString(url), 'url is a string');
t.regex(url, /^https?:\/\//, 'is a valid url');

@@ -53,5 +55,5 @@

generator.on('done', function (sitemap, store) {
generator.on('done', function (sitemaps, store) {
// sitemap
t.is(typeof sitemap, 'string', 'returns xml string');
t.truthy(isArray(sitemaps), 'returns array');

@@ -58,0 +60,0 @@ // store

@@ -16,4 +16,4 @@ /* eslint no-unused-vars:0 */

generator.on('done', function (sitemap, store) {
t.regex(sitemap, /[^img.jpg]/, 'does not contain img.jpg');
generator.on('done', function (sitemaps, store) {
t.regex(sitemaps[0], /[^img.jpg]/, 'does not contain img.jpg');
t.end();

@@ -20,0 +20,0 @@ });

@@ -17,3 +17,5 @@ /* eslint no-unused-vars:0 */

restrictToBasepath: false,
maxEntriesPerFile: 50000,
crawlerMaxDepth: 0,
}, 'objects are equal');
});

@@ -16,9 +16,9 @@ /* eslint no-unused-vars:0 */

generator.on('done', function (sitemap, store) {
generator.on('done', function (sitemaps, store) {
// sitemap
t.regex(sitemap, /^<\?xml version="1.0" encoding="UTF\-8"\?>/, 'has xml header');
t.regex(sitemaps[0], /^<\?xml version="1.0" encoding="UTF\-8"\?>/, 'has xml header');
var urlsRegex = /<urlset xmlns=".+?">(.|\n)+<\/urlset>/;
t.regex(sitemap, urlsRegex, 'has urlset property');
t.truthy(sitemap.match(/<url>(.|\n)+?<\/url>/g), 'contains url properties');
t.truthy(sitemap.match(/<loc>(.|\n)+?<\/loc>/g), 'contains loc properties');
t.regex(sitemaps[0], urlsRegex, 'has urlset property');
t.truthy(sitemaps[0].match(/<url>(.|\n)+?<\/url>/g), 'contains url properties');
t.truthy(sitemaps[0].match(/<loc>(.|\n)+?<\/loc>/g), 'contains loc properties');

@@ -35,4 +35,4 @@ t.end();

var generator = new SitemapGenerator('invalid');
generator.on('done', function (sitemap) {
t.is(sitemap, null, 'returns "null"');
generator.on('done', function (sitemaps) {
t.is(sitemaps, null, 'returns "null"');
t.end();

@@ -42,1 +42,15 @@ });

});
test.cb('should create multiple sitemaps', function (t) {
t.plan(2);
var generator = new SitemapGenerator(buildUrl(baseUrl, port, ''), {
maxEntriesPerFile: 1,
});
generator.on('done', function (sitemaps) {
t.truthy(sitemaps.length > 1, 'creates more than 1 sitemap');
t.regex(sitemaps[0], /sitemapindex/, 'creates sitemapindex file');
t.end();
});
generator.start();
});
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc