New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

sitemap-generator

Package Overview
Dependencies
Maintainers
1
Versions
61
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sitemap-generator - npm Package Compare versions

Comparing version 4.0.0 to 4.1.0

20

package.json
{
"name": "sitemap-generator",
"version": "4.0.0",
"version": "4.1.0",
"description": "Easily create XML sitemaps for your website.",

@@ -31,4 +31,4 @@ "homepage": "https://github.com/lgraubner/sitemap-generator",

"cheerio": "^0.20.0",
"lodash.assign": "^4.0.8",
"lodash.forin": "^4.1.0",
"lodash.assign": "^4.0.9",
"lodash.forin": "^4.2.0",
"robots": "^0.9.4",

@@ -42,11 +42,15 @@ "simplecrawler": "^0.7.0",

"license": "MIT",
"pre-commit": ["precommit-msg", "lint"],
"devDependencies": {
"ava": "^0.14.0",
"eslint": "^2.9.0",
"eslint-config-graubnla": "^2.0.2",
"lodash.isobject": "^3.0.2"
"ava": "^0.15.2",
"eslint": "^3.0.0",
"eslint-config-graubnla": "^3.0.0",
"lodash.isobject": "^3.0.2",
"pre-commit": "^1.1.3"
},
"scripts": {
"test": "eslint SitemapGenerator.js && ava test/all.js"
"lint": "eslint SitemapGenerator.js",
"test": "npm run lint && ava test/all.js",
"precommit-msg": "echo 'Pre-commit checks...' && exit 0"
}
}

2

README.md

@@ -29,3 +29,3 @@ # Sitemap Generator

The crawler will fetch all folder URL pages and file types [parsed by Google](https://support.google.com/webmasters/answer/35287?hl=en). If present the `robots.txt` will be taken into account and possible rules are applied for each URL to consider if it should be added to the sitemap. Also the crawler will not fetch URL's from a page if the robots meta tag with the value `nofollow` is present. The crawler is able to apply the `base` value to found links.
The crawler will fetch all folder URL pages and file types [parsed by Google](https://support.google.com/webmasters/answer/35287?hl=en). If present the `robots.txt` will be taken into account and possible rules are applied for each URL to consider if it should be added to the sitemap. Also the crawler will not fetch URL's from a page if the robots meta tag with the value `nofollow` is present and ignore them completely if `noindex` rule is present. The crawler is able to apply the `base` value to found links.

@@ -32,0 +32,0 @@ The protocol can be omitted, if the domain uses `http` or redirects to `https` are set up.

@@ -77,3 +77,8 @@ 'use strict';

// set initial port
this.crawler.initialPort = parseInt(this.options.port);
var port = parseInt(this.options.port);
// set port to 443 if https is present, respect user options
if (this.baseUrl.protocol === 'https:' && this.options.port === 80) {
port = 443;
}
this.crawler.initialPort = port;

@@ -114,2 +119,5 @@ // set initial path to subpage if provided

// array with urls that are crawled but shouldn't be indexed
this.crawler.noindex = [];
// custom discover function

@@ -174,2 +182,8 @@ this.crawler.discoverResources = this._discoverResources;

var metaRobots = $('meta[name="robots"]');
// add to noindex for it later to be removed from the store before a sitemap is built
if (metaRobots.length && /noindex/i.test(metaRobots.attr('content'))) {
this.noindex.push(queueItem.url);
}
if (metaRobots.length && /nofollow/i.test(metaRobots.attr('content'))) {

@@ -224,3 +238,15 @@ return [];

var sitemap = null;
if (this.store.found.length > 0) {
// Remove urls with a robots meta tag 'noindex' before building the sitemap
this.crawler.noindex.forEach(function (page) {
var index = this.store.found.indexOf(page);
if (index !== -1) {
// remove url from found array
var ignored = this.store.found.splice(index, 1)[0];
// add url to ignored url
this.store.ignored.push(ignored);
}
}, this);
// xml base

@@ -227,0 +253,0 @@ var xml = xmlbuilder.create('urlset', { version: '1.0', encoding: 'UTF-8' })

@@ -10,4 +10,6 @@ /* eslint no-unused-vars:0 */

// start testserver
test.before(function () {
server.listen(port, localhost);
test.cb.before(function (t) {
server.listen(port, localhost, function () {
t.end();
});
});

@@ -22,5 +24,7 @@

test.after(function () {
test.cb.after(function (t) {
// stop test server
server.close();
server.close(function () {
t.end();
});
});

@@ -41,2 +41,18 @@ /* eslint no-unused-vars:0 */

test.cb('should ignore pages with "noindex" rule', function (t) {
t.plan(2);
var generator = new SitemapGenerator(localhost, {
port: port,
});
generator.on('done', function (sitemap, store) {
t.is(store.found.indexOf(buildUrl(localhost, port, '/noindex')), -1);
t.not(store.ignored.indexOf(buildUrl(localhost, port, '/noindex')), -1);
t.end();
});
generator.start();
});
test.cb('should restrict subsequent requests to given path', function (t) {

@@ -43,0 +59,0 @@ t.plan(1);

@@ -10,2 +10,3 @@ /* eslint-disable */

'<a href="/single">Single</a>',
'<a href="/noindex">Noindex</a>',
].join('\n'));

@@ -145,2 +146,10 @@ res.end();

'/noindex': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<meta name="robots" content="noindex,follow">',
].join('\n'));
res.end();
},
'/robots.txt': function (req, res) {

@@ -147,0 +156,0 @@ res.writeHead(200, { 'Content-Type': 'text/html' });

@@ -19,3 +19,3 @@ /* eslint no-unused-vars:0 */

// sitemap
t.regex(sitemap, /^<\?xml version="1.0" encoding="UTF-8"\?>/, 'has xml header');
t.regex(sitemap, /^<\?xml version="1.0" encoding="UTF\-8"\?>/, 'has xml header');
var urlsRegex = /<urlset xmlns=".+?">(.|\n)+<\/urlset>/;

@@ -22,0 +22,0 @@ t.regex(sitemap, urlsRegex, 'has urlset property');

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc