New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

sitemap-generator-cli

Package Overview
Dependencies
Maintainers
1
Versions
30
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sitemap-generator-cli - npm Package Compare versions

Comparing version 5.0.1 to 6.0.0

36

cli.js

@@ -9,12 +9,14 @@ #!/usr/bin/env node

var chalk = require('chalk');
var path = require('path');
var fs = require('fs');
program.version(pkg.version)
.usage('[options] <url>')
.usage('[options] <url> <filepath>')
.option('-b, --baseurl', 'only allow URLs which match given <url>')
.option('-d, --dry', 'show status messages without generating a sitemap')
.option('-q, --query', 'consider query string')
.option('-v, --verbose', 'print details when crawling')
.parse(process.argv);
// display help if no url provided
if (!program.args[0]) {
if (program.args.length < 2) {
program.help();

@@ -24,2 +26,7 @@ process.exit();

if (!/[a-zA-Z]\.xml$/.test(program.args[1])) {
console.error(chalk.red('Filepath should contain a filename ending with ".xml".'));
process.exit();
}
// create SitemapGenerator instance

@@ -32,3 +39,3 @@ var generator = new SitemapGenerator(program.args[0], {

// add event listeners to crawler if dry mode enabled
if (program.dry) {
if (program.verbose) {
// fetch status

@@ -56,5 +63,5 @@ generator.on('fetch', function (status, url) {

// crawling done
generator.on('done', function (sitemap, store) {
generator.on('done', function (sitemaps, store) {
// show stats if dry mode
if (program.dry) {
if (program.verbose) {
var message = 'Added %s pages, ignored %s pages, encountered %s errors.';

@@ -77,5 +84,18 @@ var stats = [

}
}
if (sitemaps !== null) {
// save files to disk
sitemaps.map(function write(map, index) {
var filePath = path.resolve(program.args[1]);
if (index !== 0) {
filePath = filePath.replace(/(\.xml)$/, '_part' + index + '$1');
}
return fs.writeFileSync(filePath, map, function (err) {
if (err) throw err;
});
});
} else {
// print sitemap
console.log(sitemap);
console.error(chalk.red('URL not found.'));
}

@@ -82,0 +102,0 @@

{
"name": "sitemap-generator-cli",
"version": "5.0.1",
"version": "6.0.0",
"description": "Create xml sitemaps from the command line.",

@@ -32,3 +32,3 @@ "homepage": "https://github.com/lgraubner/sitemap-generator-cli",

"commander": "^2.9.0",
"sitemap-generator": "^5.0.1"
"sitemap-generator": "6.0.0"
},

@@ -44,4 +44,4 @@ "preferGlobal": true,

"devDependencies": {
"ava": "^0.17.0",
"eslint": "^3.13.1",
"ava": "^0.18.2",
"eslint": "^3.16.1",
"eslint-config-graubnla": "^3.0.0"

@@ -48,0 +48,0 @@ },

@@ -15,3 +15,3 @@ # Sitemap Generator CLI

```BASH
$ sitemap-generator [options] <url>
$ sitemap-generator [options] <url> <filepath>
```

@@ -21,6 +21,6 @@

When the crawler finished the XML Sitemap will be built and printed directly to your console. Pass the sitemap to save the sitemap as a file or do something else:
When the crawler finished the XML Sitemap will be built and saved to your specified filepath. If the count of fetched pages is greater than 50000 it will be splitted into several sitemap files and create a sitemapindex file. Google does not allow more than 50000 items in one sitemap.
```BASH
$ sitemap-generator http://example.com > some/path/sitemap.xml
$ sitemap-generator http://example.com some/path/sitemap.xml
```

@@ -32,11 +32,11 @@

Usage: sitemap-generator [options] <url>
Usage: cli [options] <url> <filepath>
Options:
-h, --help output usage information
-V, --version output the version number
-b, --baseurl only allow URLs which match given <url>
-d, --dry show status messages without generating a sitemap
-q, --query consider query string
-h, --help output usage information
-V, --version output the version number
-b, --baseurl only allow URLs which match given <url>
-q, --query consider query string
-v, --verbose print details when crawling
```

@@ -48,3 +48,3 @@

// strictly match given path and consider query string
$ sitemap-generator -bq example.com/foo/
$ sitemap-generator -bq example.com/foo/ sitemap.xml
```

@@ -58,13 +58,13 @@

### `--dry`
### `--query`
Default: `false`
Use this option to make a dry run and check the generation process to see which sites are fetched and if there are any errors.
Will not create a sitemap!
Consider URLs with query strings like `http://www.example.com/?foo=bar` as indiviual sites and add them to the sitemap.
### `--query`
### `--verbose`
Default: `false`
Consider URLs with query strings like `http://www.example.com/?foo=bar` as indiviual sites and add them to the sitemap.
Print debug messages during crawling process. Also prints out a summery when finished.
/* eslint no-unused-vars:0 */
var test = require('ava');
var fs = require('fs');
var path = require('path');
var port = require('./lib/constants').port;

@@ -16,9 +19,8 @@ var baseUrl = require('./lib/constants').baseUrl;

test.cb('should return null for invalid URL\'s', function (t) {
t.plan(3);
test.cb('should return error message for invalid URL\'s', function (t) {
t.plan(2);
exec('node cli.js invalid', function (error, stdout, stderr) {
exec('node cli.js invalid sitemap.xml', function (error, stdout, stderr) {
t.is(error, null, 'no error');
t.is(stderr, '');
t.regex(stdout, /^null/);
t.not(stderr, '');

@@ -29,14 +31,8 @@ t.end();

test.cb('should return valid sitemap', function (t) {
t.plan(6);
test.cb('should return error message for missing/invalid filepath', function (t) {
t.plan(2);
exec('node cli.js ' + baseUrl + ':' + port, function (error, stdout, stderr) {
t.is(error, null, 'no error');
t.is(stderr, '', 'no error messages');
// sitemap
t.regex(stdout, /^<\?xml version="1.0" encoding="UTF-8"\?>/, 'has xml header');
var urlsRegex = /<urlset xmlns=".+?">(.|\n)+<\/urlset>/;
t.regex(stdout, urlsRegex, 'has urlset property');
t.truthy(stdout.match(/<url>(.|\n)+?<\/url>/g), 'contains url properties');
t.truthy(stdout.match(/<loc>(.|\n)+?<\/loc>/g), 'contains loc properties');
t.not(stdout, '');

@@ -47,11 +43,35 @@ t.end();

test.cb('should return valid sitemap', function (t) {
t.plan(7);
exec('node cli.js ' + baseUrl + ':' + port + ' sitemap_valid.xml',
function (error, stdout, stderr) {
t.is(error, null, 'no error');
t.is(stderr, '', 'no error messages');
// sitemap
var filePath = path.resolve('./sitemap_valid.xml');
t.truthy(fs.existsSync(filePath));
t.regex(fs.readFileSync(filePath), /^<\?xml version="1.0" encoding="UTF-8"\?>/);
var urlsRegex = /<urlset xmlns=".+?">(.|\n)+<\/urlset>/;
t.regex(fs.readFileSync(filePath), urlsRegex, 'has urlset property');
t.regex(fs.readFileSync(filePath), /<url>(.|\n)+?<\/url>/g, 'contains url properties');
t.regex(fs.readFileSync(filePath), /<loc>(.|\n)+?<\/loc>/g, 'contains loc properties');
t.end();
}
);
});
test.cb('should restrict crawler to baseurl if option is enabled', function (t) {
t.plan(3);
t.plan(4);
// eslint-disable-next-line
exec('node cli.js ' + baseUrl + ':' + port + '/subpage --baseurl', function (error, stdout, stderr) {
exec('node cli.js --baseurl ' + baseUrl + ':' + port + '/subpage sitemap_baseurl.xml', function (error, stdout, stderr) {
t.is(error, null, 'no error');
t.is(stderr, '', 'no error messages');
var filePath = path.resolve('sitemap_baseurl.xml');
t.truthy(fs.existsSync(filePath));
var regex = new RegExp('http:\/\/' + baseUrl + ':' + port + '/<');
t.falsy(regex.test(stdout), 'index page is not included in sitemap');
t.falsy(regex.test(fs.readFileSync(filePath)), 'index page is not included in sitemap');

@@ -63,31 +83,25 @@ t.end();

test.cb('should include query strings if enabled', function (t) {
t.plan(5);
t.plan(4);
exec('node cli.js ' + baseUrl + ':' + port + ' --query', function (error, stdout, stderr) {
t.is(error, null, 'no error');
t.is(stderr, '', 'no error messages');
t.not(stdout, '', 'stdout is not empty');
t.regex(stdout, /[^<\?xml version="1.0" encoding="UTF\-8"\?>]/, 'does not print xml sitemap');
exec('node cli.js --query ' + baseUrl + ':' + port + ' sitemap_query.xml',
function (error, stdout, stderr) {
t.is(error, null, 'no error');
t.is(stderr, '', 'no error messages');
var filePath = path.resolve('sitemap_query.xml');
t.truthy(fs.existsSync(filePath));
var regex = new RegExp('/?querypage');
t.truthy(regex.test(stdout), 'query page included');
var regex = new RegExp('/?querypage');
t.truthy(regex.test(fs.readFileSync(filePath)), 'query page included');
t.end();
});
t.end();
}
);
});
test.cb('should log requests if dry mode is enabled', function (t) {
t.plan(4);
test.cb.after(function (t) {
// remove test sitemaps
fs.unlinkSync(path.resolve('sitemap_baseurl.xml'));
fs.unlinkSync(path.resolve('sitemap_query.xml'));
fs.unlinkSync(path.resolve('sitemap_valid.xml'));
exec('node cli.js ' + baseUrl + ':' + port + ' --dry', function (error, stdout, stderr) {
t.is(error, null, 'no error');
t.is(stderr, '', 'no error messages');
t.not(stdout, '', 'stdout is not empty');
t.regex(stdout, /[^<\?xml version="1.0" encoding="UTF\-8"\?>]/, 'does not print xml sitemap');
t.end();
});
});
test.cb.after(function (t) {
// stop test server

@@ -94,0 +108,0 @@ server.close(function () {

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc