sitemap-generator - npm Package Compare versions

Comparing version 4.0.0 to 4.1.0

package.json

		{
		"name": "sitemap-generator",
		"version": "4.0.0",
		"version": "4.1.0",
		"description": "Easily create XML sitemaps for your website.",
		@@ -31,4 +31,4 @@ "homepage": "https://github.com/lgraubner/sitemap-generator",
		"cheerio": "^0.20.0",
		"lodash.assign": "^4.0.8",
		"lodash.forin": "^4.1.0",
		"lodash.assign": "^4.0.9",
		"lodash.forin": "^4.2.0",
		"robots": "^0.9.4",
		@@ -42,11 +42,15 @@ "simplecrawler": "^0.7.0",
		"license": "MIT",
		"pre-commit": ["precommit-msg", "lint"],
		"devDependencies": {
		"ava": "^0.14.0",
		"eslint": "^2.9.0",
		"eslint-config-graubnla": "^2.0.2",
		"lodash.isobject": "^3.0.2"
		"ava": "^0.15.2",
		"eslint": "^3.0.0",
		"eslint-config-graubnla": "^3.0.0",
		"lodash.isobject": "^3.0.2",
		"pre-commit": "^1.1.3"
		},
		"scripts": {
		"test": "eslint SitemapGenerator.js && ava test/all.js"
		"lint": "eslint SitemapGenerator.js",
		"test": "npm run lint && ava test/all.js",
		"precommit-msg": "echo 'Pre-commit checks...' && exit 0"
		}
		}

README.md

		@@ -29,3 +29,3 @@ # Sitemap Generator

		The crawler will fetch all folder URL pages and file types [parsed by Google](https://support.google.com/webmasters/answer/35287?hl=en). If present the `robots.txt` will be taken into account and possible rules are applied for each URL to consider if it should be added to the sitemap. Also the crawler will not fetch URL's from a page if the robots meta tag with the value `nofollow` is present. The crawler is able to apply the `base` value to found links.
		The crawler will fetch all folder URL pages and file types [parsed by Google](https://support.google.com/webmasters/answer/35287?hl=en). If present the `robots.txt` will be taken into account and possible rules are applied for each URL to consider if it should be added to the sitemap. Also the crawler will not fetch URL's from a page if the robots meta tag with the value `nofollow` is present and ignore them completely if `noindex` rule is present. The crawler is able to apply the `base` value to found links.

		@@ -32,0 +32,0 @@ The protocol can be omitted, if the domain uses `http` or redirects to `https` are set up.

SitemapGenerator.js

		@@ -77,3 +77,8 @@ 'use strict';
		// set initial port
		this.crawler.initialPort = parseInt(this.options.port);
		var port = parseInt(this.options.port);
		// set port to 443 if https is present, respect user options
		if (this.baseUrl.protocol === 'https:' && this.options.port === 80) {
		port = 443;
		}
		this.crawler.initialPort = port;

		@@ -114,2 +119,5 @@ // set initial path to subpage if provided

		// array with urls that are crawled but shouldn't be indexed
		this.crawler.noindex = [];

		// custom discover function
		@@ -174,2 +182,8 @@ this.crawler.discoverResources = this._discoverResources;
		var metaRobots = $('meta[name="robots"]');

		// add to noindex for it later to be removed from the store before a sitemap is built
		if (metaRobots.length && /noindex/i.test(metaRobots.attr('content'))) {
		this.noindex.push(queueItem.url);
		}

		if (metaRobots.length && /nofollow/i.test(metaRobots.attr('content'))) {
		@@ -224,3 +238,15 @@ return [];
		var sitemap = null;

		if (this.store.found.length > 0) {
		// Remove urls with a robots meta tag 'noindex' before building the sitemap
		this.crawler.noindex.forEach(function (page) {
		var index = this.store.found.indexOf(page);
		if (index !== -1) {
		// remove url from found array
		var ignored = this.store.found.splice(index, 1)[0];
		// add url to ignored url
		this.store.ignored.push(ignored);
		}
		}, this);

		// xml base
		@@ -227,0 +253,0 @@ var xml = xmlbuilder.create('urlset', { version: '1.0', encoding: 'UTF-8' })

test/all.js

		@@ -10,4 +10,6 @@ /* eslint no-unused-vars:0 */
		// start testserver
		test.before(function () {
		server.listen(port, localhost);
		test.cb.before(function (t) {
		server.listen(port, localhost, function () {
		t.end();
		});
		});
		@@ -22,5 +24,7 @@

		test.after(function () {
		test.cb.after(function (t) {
		// stop test server
		server.close();
		server.close(function () {
		t.end();
		});
		});

test/fetching.js

		@@ -41,2 +41,18 @@ /* eslint no-unused-vars:0 */

		test.cb('should ignore pages with "noindex" rule', function (t) {
		t.plan(2);

		var generator = new SitemapGenerator(localhost, {
		port: port,
		});

		generator.on('done', function (sitemap, store) {
		t.is(store.found.indexOf(buildUrl(localhost, port, '/noindex')), -1);
		t.not(store.ignored.indexOf(buildUrl(localhost, port, '/noindex')), -1);
		t.end();
		});

		generator.start();
		});

		test.cb('should restrict subsequent requests to given path', function (t) {
		@@ -43,0 +59,0 @@ t.plan(1);

test/lib/routes.js

		@@ -10,2 +10,3 @@ /* eslint-disable */
		'<a href="/single">Single</a>',
		'<a href="/noindex">Noindex</a>',
		].join('\n'));
		@@ -145,2 +146,10 @@ res.end();

		'/noindex': function (req, res) {
		res.writeHead(200, { 'Content-Type': 'text/html' });
		res.write([
		'<meta name="robots" content="noindex,follow">',
		].join('\n'));
		res.end();
		},

		'/robots.txt': function (req, res) {
		@@ -147,0 +156,0 @@ res.writeHead(200, { 'Content-Type': 'text/html' });

test/sitemap.js

		@@ -19,3 +19,3 @@ /* eslint no-unused-vars:0 */
		// sitemap
		t.regex(sitemap, /^<\?xml version="1.0" encoding="UTF-8"\?>/, 'has xml header');
		t.regex(sitemap, /^<\?xml version="1.0" encoding="UTF\-8"\?>/, 'has xml header');
		var urlsRegex = /<urlset xmlns=".+?">(.\|\n)+<\/urlset>/;
		@@ -22,0 +22,0 @@ t.regex(sitemap, urlsRegex, 'has urlset property');

.travis.yml

Sorry, the diff of this file is not supported yet

Fixed alerts