New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

sitemap-generator

Package Overview
Dependencies
Maintainers
1
Versions
61
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sitemap-generator - npm Package Compare versions

Comparing version 3.0.2 to 3.1.0

.eslintrc.json

233

lib/SitemapGenerator.js

@@ -1,28 +0,14 @@

#!/usr/bin/env node
'use strict';
"use strict";
var Crawler = require('simplecrawler');
var _ = require('lodash');
var fs = require('fs');
var builder = require('xmlbuilder');
var program = require('commander');
var chalk = require('chalk');
var path = require('path');
var URL = require('url-parse');
var robotsParser = require('robots-parser');
var request = require('request');
const Crawler = require("simplecrawler");
const _ = require("lodash");
const fs = require("fs");
const builder = require("xmlbuilder");
const program = require("commander");
const chalk = require("chalk");
const path = require("path");
const URL = require("url-parse");
const robotsParser = require("robots-parser");
const request = require("request");
const pkg = require("../package.json");
program.version(pkg.version)
.usage("[options] <url>")
.option("-q, --query", "consider query string")
.option("-f, --filename [filename]", "sets output filename")
.option("-p, --path [path]", "specifies output path")
.parse(process.argv);
if (!program.args[0]) {
program.help();
}
/**

@@ -33,99 +19,97 @@ * Generator object, handling the crawler and file generation.

*/
var SitemapGenerator = function(url) {
this.chunk = [];
function SitemapGenerator(url) {
var port = 80;
var exclude = ['gif', 'jpg', 'jpeg', 'png', 'ico', 'bmp', 'ogg', 'webp',
'mp4', 'webm', 'mp3', 'ttf', 'woff', 'json', 'rss', 'atom', 'gz', 'zip',
'rar', '7z', 'css', 'js', 'gzip', 'exe'];
var exts = exclude.join('|');
var regex = new RegExp('\.(' + exts + ')', 'i');
this.uri = new URL(url);
this.crawler = new Crawler(this.uri.host);
this.chunk = [];
this.crawler.initialPath = "/";
this.uri = new URL(url);
this.crawler = new Crawler(this.uri.host);
var port = 80;
if (process.env.NODE_ENV === "development") {
port = 8000;
}
this.crawler.initialPort = port;
this.crawler.initialPath = '/';
if (process.env.NODE_ENV === 'development') {
port = 8000;
}
this.crawler.initialPort = port;
if (!this.uri.protocol) {
this.uri.set("protocol", "http:");
}
if (!this.uri.protocol) {
this.uri.set('protocol', 'http:');
}
this.crawler.initialProtocol = this.uri.protocol.replace(":", "");
this.crawler.userAgent = "Node/Sitemap-Generator";
this.crawler.initialProtocol = this.uri.protocol.replace(':', '');
this.crawler.userAgent = 'Node/Sitemap-Generator';
if (!program.query) {
this.crawler.stripQuerystring = true;
}
if (!program.query) {
this.crawler.stripQuerystring = true;
}
var exclude = ["gif", "jpg", "jpeg", "png", "ico", "bmp", "ogg", "webp", "mp4", "webm", "mp3", "ttf", "woff", "json", "rss", "atom", "gz", "zip", "rar", "7z", "css", "js", "gzip", "exe"];
this.crawler.addFetchCondition(function (parsedURL) {
return !parsedURL.path.match(regex);
});
}
var exts = exclude.join("|");
var regex = new RegExp("\.(" + exts + ")", "i");
this.crawler.addFetchCondition(function(parsedURL) {
return !parsedURL.path.match(regex);
});
request(this.uri.set("pathname", "/robots.txt").toString(), (error, response, body) => {
if (!error && response.statusCode == 200) {
this.robots = robotsParser(response.request.uri.href, body);
}
this.create();
});
};
/**
* Create the crawler instance.
*/
SitemapGenerator.prototype.create = function() {
SitemapGenerator.prototype.start = function () {
this.crawler.on('fetchcomplete', function (item) {
var allowed = true;
this.crawler.on("fetchcomplete", (item) => {
var allowed = true;
if (this.robots) {
try {
allowed = this.robots.isAllowed(item.url, this.crawler.userAgent);
} catch (e) {
// silent error
}
}
if (this.robots) {
try {
allowed = this.robots.isAllowed(item.url, this.crawler.userAgent);
} catch (e) {
// silent error
}
}
if (allowed) {
this.chunk.push({
loc: item.url,
});
if (allowed) {
this.chunk.push({
loc: item.url
});
console.log(chalk.cyan.bold('Found:'), chalk.gray(item.url));
} else {
console.log(chalk.bold.magenta('Ignored:'), chalk.gray(item.url));
}
}.bind(this));
console.log(chalk.cyan.bold("Found:"), chalk.gray(item.url));
} else {
console.log(chalk.bold.magenta("Ignored:"), chalk.gray(item.url));
}
});
this.crawler.on('fetch404', function (item) {
console.log(chalk.red.bold('Not found:'), chalk.gray(item.url));
});
this.crawler.on("fetch404", function(item, response) {
console.log(chalk.red.bold("Not found:"), chalk.gray(item.url));
});
this.crawler.on('fetcherror', function (item) {
console.log(chalk.red.bold('Fetch error:'), chalk.gray(item.url));
});
this.crawler.on("fetcherror", function(item, response) {
console.log(chalk.red.bold("Fetch error:"), chalk.gray(item.url));
});
this.crawler.on('complete', function () {
if (_.isEmpty(this.chunk)) {
console.error(chalk.red.bold('Error: Site "%s" could not be found.'), program.args[0]);
process.exit(1);
}
this.crawler.on("complete", () => {
if (_.isEmpty(this.chunk)) {
console.error(chalk.red.bold("Error: Site '%s' could not be found."), program.args[0]);
process.exit(1);
}
this.write(function (err) {
if (err) {
console.error(chalk.red.bold(err));
process.exit(1);
} else {
console.log(chalk.white('Added %s sites, encountered %s errors.'),
this.chunk.length, this.crawler.queue.errors());
console.log(chalk.green.bold('Sitemap successfully created!'));
process.exit();
}
}.bind(this));
}.bind(this));
this.write((err, path) => {
if (err) {
console.error(chalk.red.bold(err));
process.exit(1);
} else {
console.log(chalk.white("Added %s sites, encountered %s errors."), this.chunk.length, this.crawler.queue.errors());
console.log(chalk.green.bold("Sitemap successfully created!"));
process.exit();
}
});
});
request(this.uri.set('pathname', '/robots.txt').toString(), function (error, response, body) {
if (!error && response.statusCode === 200) {
self.robots = robotsParser(response.request.uri.href, body);
}
this.crawler.start();
}.bind(this));
};

@@ -138,31 +122,32 @@

*/
SitemapGenerator.prototype.write = function(callback) {
var xml = builder.create("urlset", { version: "1.0", encoding: "UTF-8" })
.att("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9");
SitemapGenerator.prototype.write = function (callback) {
var sitemap;
var outputPath = '.';
var fileName = 'sitemap';
var xml = builder.create('urlset', { version: '1.0', encoding: 'UTF-8' })
.att('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9');
_.forIn(this.chunk, function(value, key) {
xml.ele("url")
.ele(value);
});
_.forIn(this.chunk, function (value) {
xml.ele('url')
.ele(value);
});
var sitemap = xml.end({ pretty: true, indent: ' ', newline: "\n" });
sitemap = xml.end({ pretty: true, indent: ' ', newline: '\n' });
var outputPath = ".";
if (program.path) {
outputPath = program.path.replace(/\/+$/, "");
}
if (program.path) {
outputPath = program.path.replace(/\/+$/, '');
}
var fileName = "sitemap";
if (program.filename) {
fileName = program.filename.replace(/\.xml$/i, "");
if (program.filename) {
fileName = program.filename.replace(/\.xml$/i, '');
}
outputPath = path.join(outputPath, fileName + '.xml');
fs.writeFile(outputPath, sitemap, function (err) {
if (typeof callback === 'function') {
return callback(err, outputPath);
}
outputPath = path.join(outputPath, fileName + ".xml");
fs.writeFile(outputPath, sitemap, function(err) {
if (typeof callback === "function") {
return callback(err, outputPath);
}
});
});
};
var generator = new SitemapGenerator(program.args[0]);
module.exports = SitemapGenerator;
{
"name": "sitemap-generator",
"version": "3.0.2",
"version": "3.1.0",
"description": "Create xml sitemaps from the command line.",

@@ -19,3 +19,3 @@ "homepage": "https://github.com/lgraubner/node-sitemap-generator",

],
"main": "lib/SitemapGenerator.js",
"main": "index.js",
"repository": {

@@ -31,3 +31,3 @@ "type": "git",

"lodash": "^3.10.1",
"xmlbuilder": "^4.1.0",
"xmlbuilder": "^4.2.0",
"commander": "^2.9.0",

@@ -41,6 +41,6 @@ "chalk": "^1.1.1",

"engines": {
"node": ">=4.0"
"node": ">=0.12"
},
"bin": {
"sitemap-generator": "./lib/SitemapGenerator.js"
"sitemap-generator": "index.js"
},

@@ -50,7 +50,9 @@ "license": "MIT",

"chai": "^3.4.1",
"eslint": "^1.10.3",
"eslint-config-airbnb": "^3.0.0",
"mocha": "^2.3.4"
},
"scripts": {
"test": "NODE_ENV=development mocha test"
"test": "eslint index.js lib/** && NODE_ENV=development mocha test"
}
}

@@ -1,202 +0,178 @@

var should = require("chai").should();
var exec = require("child_process").exec;
var fs = require("fs");
/* globals it:false, before:false, describe:false */
/* eslint no-unused-expressions: 0 */
/* eslint-env node, mocha */
require("./lib/testserver.js");
var should = require('chai').should();
var exec = require('child_process').exec;
var fs = require('fs');
describe("$ sitemap-generator invalid", function() {
var _error;
var _stdout;
var _stderr;
require('./lib/testserver.js');
before(function(done) {
fs.stat("./sitemap.xml", function(err, stats) {
if (err && err.code !== "ENOENT") {
fs.unlink("./sitemap.xml");
}
});
var cmd = exec("node ./lib/SitemapGenerator.js illegal", function(error, stdout, stderr) {
_error = error;
_stdout = stdout;
_stderr = stderr;
done();
});
});
describe('$ sitemap-generator invalid', function () {
var _error;
var _stderr;
it("should fail because of invalid url", function() {
_stderr.should.not.be.empty;
before(function (done) {
fs.stat('./sitemap.xml', function (err) {
if (err && err.code !== 'ENOENT') {
fs.unlink('./sitemap.xml');
}
});
it("should exit with error code '1'", function() {
_error.code.should.equal(1);
exec('node ./index.js illegal', function cmd(error, stdout, stderr) {
_error = error;
_stderr = stderr;
done();
});
});
it("should not create an xml file", function(done) {
fs.stat("./sitemap.xml", function(err, stats) {
err.code.should.equal("ENOENT");
done();
});
it('should fail because of invalid url', function () {
_stderr.should.not.be.empty;
});
it('should exit with error code "1"', function () {
_error.code.should.equal(1);
});
it('should not create an xml file', function (done) {
fs.stat('./sitemap.xml', function (err) {
err.code.should.equal('ENOENT');
done();
});
});
});
describe("$ sitemap-generator 127.0.0.1", function() {
this.timeout(10000);
describe('$ sitemap-generator 127.0.0.1', function () {
var _error;
var _stdout;
var _stderr;
this.timeout(10000);
var _error;
var _stdout;
var _stderr;
after(function () {
fs.unlink('./sitemap.xml');
});
after(function() {
fs.unlink("./sitemap.xml");
before(function (done) {
exec('node ./index.js 127.0.0.1', function cmd(error, stdout, stderr) {
_error = error;
_stdout = stdout;
_stderr = stderr;
done();
});
});
before(function(done) {
var cmd = exec("node ./lib/SitemapGenerator.js 127.0.0.1", function(error, stdout, stderr) {
_error = error;
_stdout = stdout;
_stderr = stderr;
done();
});
});
it('should not throw any errors', function () {
_stderr.should.be.empty;
should.equal(_error, null);
});
it("should not throw any errors", function() {
_stderr.should.be.empty;
should.equal(_error, null);
});
it('should return success message', function () {
_stdout.should.not.be.empty;
});
it("should return success message", function() {
_stdout.should.not.be.empty;
it('should create an xml file', function (done) {
fs.stat('./sitemap.xml', function (err) {
should.equal(err, null);
done();
});
});
it("should create an xml file", function(done) {
fs.stat("./sitemap.xml", function(err, stats) {
should.equal(err, null);
done();
});
it('should contain xml markup', function (done) {
fs.readFile('./sitemap.xml', function (err, data) {
var content = data.toString();
content.should.contain('<?xml version="1.0" encoding="UTF-8"?>');
content.should.match(/<url>(\s|\S)*?<loc>\S+?<\/loc>(\s|\S)*?<\/url>/);
done();
});
});
it("should contain xml markup", function(done) {
fs.readFile("./sitemap.xml", function(err, data) {
var content = data.toString();
content.should.contain('<?xml version="1.0" encoding="UTF-8"?>');
content.should.match(/<url>(\s|\S)*?<loc>\S+?<\/loc>(\s|\S)*?<\/url>/);
done();
});
it('should take robots.txt into account', function (done) {
fs.readFile('./sitemap.xml', function (err, data) {
data.toString().should.not.contain('127.0.0.1/ignore');
done();
});
it("should take robots.txt into account", function(done) {
fs.readFile("./sitemap.xml", function(err, data) {
data.toString().should.not.contain("127.0.0.1/ignore");
done();
});
});
});
});
describe("$ sitemap-generator http://127.0.0.1/foo/bar", function() {
describe('$ sitemap-generator http://127.0.0.1/foo/bar', function () {
var _error;
var _stderr;
var _error;
var _stdout;
var _stderr;
after(function () {
fs.unlink('./sitemap.xml');
});
after(function() {
fs.unlink("./sitemap.xml");
before(function (done) {
exec('node ./index.js http://127.0.0.1', function cmd(error, stdout, stderr) {
_error = error;
_stderr = stderr;
done();
});
});
before(function(done) {
var cmd = exec("node ./lib/SitemapGenerator.js http://127.0.0.1", function(error, stdout, stderr) {
_error = error;
_stdout = stdout;
_stderr = stderr;
done();
});
});
it("should ignore protocol and path", function() {
_stderr.should.be.empty;
should.equal(_error, null);
});
it('should ignore protocol and path', function () {
_stderr.should.be.empty;
should.equal(_error, null);
});
});
describe("$ sitemap-generator --filename=test 127.0.0.1", function() {
describe('$ sitemap-generator --filename=test 127.0.0.1', function () {
this.timeout(10000);
var _error;
var _stdout;
var _stderr;
after(function () {
fs.unlink('./test.xml');
});
after(function() {
fs.unlink("./test.xml");
before(function (done) {
exec('node ./index.js --filename=test 127.0.0.1', function () {
done();
});
});
before(function(done) {
var cmd = exec("node ./lib/SitemapGenerator.js --filename=test 127.0.0.1", function(error, stdout, stderr) {
_error = error;
_stdout = stdout;
_stderr = stderr;
done();
});
it('should create an xml file with the correct name', function (done) {
fs.stat('./test.xml', function (err) {
should.equal(err, null);
done();
});
it("should create an xml file with the correct name", function(done) {
fs.stat("./test.xml", function(err, stats) {
should.equal(err, null);
done();
});
});
});
});
describe("$ sitemap-generator --query 127.0.0.1", function() {
describe('$ sitemap-generator --query 127.0.0.1', function () {
after(function () {
fs.unlink('./sitemap.xml');
});
var _error;
var _stdout;
var _stderr;
after(function() {
fs.unlink("./sitemap.xml");
before(function (done) {
exec('node ./index.js --query 127.0.0.1', function cmd() {
done();
});
});
before(function(done) {
var cmd = exec("node ./lib/SitemapGenerator.js --query 127.0.0.1", function(error, stdout, stderr) {
_error = error;
_stdout = stdout;
_stderr = stderr;
done();
});
it('should include links with query parameters', function (done) {
fs.readFile('./sitemap.xml', function (err, data) {
data.toString().should.contain('/site/?foo=bar');
done();
});
it("should include links with query parameters", function(done) {
fs.readFile("./sitemap.xml", function(err, data) {
data.toString().should.contain("/site/?foo=bar");
done();
});
});
});
});
describe("$ sitemap-generator --path=./tmp 127.0.0.1", function() {
describe('$ sitemap-generator --path=./tmp 127.0.0.1', function () {
after(function () {
fs.unlink('./tmp/sitemap.xml');
fs.rmdir('./tmp');
});
var _error;
var _stdout;
var _stderr;
before(function (done) {
fs.mkdir('./tmp');
after(function() {
fs.unlink("./tmp/sitemap.xml");
fs.rmdir("./tmp");
exec('node ./index.js --path=./tmp 127.0.0.1', function cmd() {
done();
});
});
before(function(done) {
fs.mkdir("./tmp");
var cmd = exec("node ./lib/SitemapGenerator.js --path=./tmp 127.0.0.1", function(error, stdout, stderr) {
_error = error;
_stdout = stdout;
_stderr = stderr;
done();
});
it('should create xml file in given path', function (done) {
fs.stat('./tmp/sitemap.xml', function (err) {
should.equal(err, null);
done();
});
it("should create xml file in given path", function(done) {
fs.stat("./tmp/sitemap.xml", function(err, stats) {
should.equal(err, null);
done();
});
});
});
});

@@ -1,64 +0,64 @@

var http = require("http");
var http = require('http');
module.exports = {
"/": function(req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
"Content-Type": "text/html"
});
res.write("<a href=\"/site\">Link 1</a><a href=\"/ignore\">Link 2</a>");
res.end();
},
'/': function (req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
'Content-Type': 'text/html',
});
res.write('<a href=\'/site\'>Link 1</a><a href=\'/ignore\'>Link 2</a>');
res.end();
},
"/ignore": function(req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
"Content-Type": "text/html"
});
res.write("this should be ignored!");
res.end();
},
'/ignore': function (req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
'Content-Type': 'text/html',
});
res.write('this should be ignored!');
res.end();
},
"/site": function(req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
"Content-Type": "text/html"
});
res.write("<a href=\"/site/2\">Link 2</a>");
res.end();
},
'/site': function (req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
'Content-Type': 'text/html',
});
res.write('<a href=\'/site/2\'>Link 2</a>');
res.end();
},
"/site/2": function(req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
"Content-Type": "text/html"
});
res.write("<a href=\"/site/?foo=bar\"");
res.end();
},
'/site/2': function (req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
'Content-Type': 'text/html',
});
res.write('<a href=\'/site/?foo=bar\'');
res.end();
},
"/site/?foo=bar": function(req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
"Content-Type": "text/html"
});
res.write("query");
res.end();
},
'/site/?foo=bar': function (req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
'Content-Type': 'text/html',
});
res.write('query');
res.end();
},
"/robots.txt": function(req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
"Content-Type": "text/plain"
});
'/robots.txt': function (req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
'Content-Type': 'text/plain',
});
res.write("User-agent: *\nDisallow: /ignore");
res.end();
}
res.write('User-agent: *\nDisallow: /ignore');
res.end();
},
};
/**
* Simple testserver.
*/
var http = require("http");
var routes = require("./routes");
var http = require('http');
var routes = require('./routes');
var server = http.createServer(function(req, res) {
if (routes[req.url] && typeof routes[req.url] == "function") {
routes[req.url](req, res);
} else {
res.writeHead(404, http.STATUS_CODES[404]);
res.write("Page not found.");
res.end();
}
}).listen(8000, "127.0.0.1");
http.createServer(function server(req, res) {
if (routes[req.url] && typeof routes[req.url] === 'function') {
routes[req.url](req, res);
} else {
res.writeHead(404, http.STATUS_CODES[404]);
res.write('Page not found.');
res.end();
}
}).listen(8000, '127.0.0.1');

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc