sitemap-generator-cli
Advanced tools
Comparing version 5.0.1 to 6.0.0
36
cli.js
@@ -9,12 +9,14 @@ #!/usr/bin/env node | ||
var chalk = require('chalk'); | ||
var path = require('path'); | ||
var fs = require('fs'); | ||
program.version(pkg.version) | ||
.usage('[options] <url>') | ||
.usage('[options] <url> <filepath>') | ||
.option('-b, --baseurl', 'only allow URLs which match given <url>') | ||
.option('-d, --dry', 'show status messages without generating a sitemap') | ||
.option('-q, --query', 'consider query string') | ||
.option('-v, --verbose', 'print details when crawling') | ||
.parse(process.argv); | ||
// display help if no url provided | ||
if (!program.args[0]) { | ||
if (program.args.length < 2) { | ||
program.help(); | ||
@@ -24,2 +26,7 @@ process.exit(); | ||
if (!/[a-zA-Z]\.xml$/.test(program.args[1])) { | ||
console.error(chalk.red('Filepath should contain a filename ending with ".xml".')); | ||
process.exit(); | ||
} | ||
// create SitemapGenerator instance | ||
@@ -32,3 +39,3 @@ var generator = new SitemapGenerator(program.args[0], { | ||
// add event listeners to crawler if dry mode enabled | ||
if (program.dry) { | ||
if (program.verbose) { | ||
// fetch status | ||
@@ -56,5 +63,5 @@ generator.on('fetch', function (status, url) { | ||
// crawling done | ||
generator.on('done', function (sitemap, store) { | ||
generator.on('done', function (sitemaps, store) { | ||
// show stats if dry mode | ||
if (program.dry) { | ||
if (program.verbose) { | ||
var message = 'Added %s pages, ignored %s pages, encountered %s errors.'; | ||
@@ -77,5 +84,18 @@ var stats = [ | ||
} | ||
} | ||
if (sitemaps !== null) { | ||
// save files to disk | ||
sitemaps.map(function write(map, index) { | ||
var filePath = path.resolve(program.args[1]); | ||
if (index !== 0) { | ||
filePath = filePath.replace(/(\.xml)$/, '_part' + index + '$1'); | ||
} | ||
return fs.writeFileSync(filePath, map, function (err) { | ||
if (err) throw err; | ||
}); | ||
}); | ||
} else { | ||
// print sitemap | ||
console.log(sitemap); | ||
console.error(chalk.red('URL not found.')); | ||
} | ||
@@ -82,0 +102,0 @@ |
{ | ||
"name": "sitemap-generator-cli", | ||
"version": "5.0.1", | ||
"version": "6.0.0", | ||
"description": "Create xml sitemaps from the command line.", | ||
@@ -32,3 +32,3 @@ "homepage": "https://github.com/lgraubner/sitemap-generator-cli", | ||
"commander": "^2.9.0", | ||
"sitemap-generator": "^5.0.1" | ||
"sitemap-generator": "6.0.0" | ||
}, | ||
@@ -44,4 +44,4 @@ "preferGlobal": true, | ||
"devDependencies": { | ||
"ava": "^0.17.0", | ||
"eslint": "^3.13.1", | ||
"ava": "^0.18.2", | ||
"eslint": "^3.16.1", | ||
"eslint-config-graubnla": "^3.0.0" | ||
@@ -48,0 +48,0 @@ }, |
@@ -15,3 +15,3 @@ # Sitemap Generator CLI | ||
```BASH | ||
$ sitemap-generator [options] <url> | ||
$ sitemap-generator [options] <url> <filepath> | ||
``` | ||
@@ -21,6 +21,6 @@ | ||
When the crawler finished the XML Sitemap will be built and printed directly to your console. Pass the sitemap to save the sitemap as a file or do something else: | ||
When the crawler finished the XML Sitemap will be built and saved to your specified filepath. If the count of fetched pages is greater than 50000 it will be splitted into several sitemap files and create a sitemapindex file. Google does not allow more than 50000 items in one sitemap. | ||
```BASH | ||
$ sitemap-generator http://example.com > some/path/sitemap.xml | ||
$ sitemap-generator http://example.com some/path/sitemap.xml | ||
``` | ||
@@ -32,11 +32,11 @@ | ||
Usage: sitemap-generator [options] <url> | ||
Usage: cli [options] <url> <filepath> | ||
Options: | ||
-h, --help output usage information | ||
-V, --version output the version number | ||
-b, --baseurl only allow URLs which match given <url> | ||
-d, --dry show status messages without generating a sitemap | ||
-q, --query consider query string | ||
-h, --help output usage information | ||
-V, --version output the version number | ||
-b, --baseurl only allow URLs which match given <url> | ||
-q, --query consider query string | ||
-v, --verbose print details when crawling | ||
``` | ||
@@ -48,3 +48,3 @@ | ||
// strictly match given path and consider query string | ||
$ sitemap-generator -bq example.com/foo/ | ||
$ sitemap-generator -bq example.com/foo/ sitemap.xml | ||
``` | ||
@@ -58,13 +58,13 @@ | ||
### `--dry` | ||
### `--query` | ||
Default: `false` | ||
Use this option to make a dry run and check the generation process to see which sites are fetched and if there are any errors. | ||
Will not create a sitemap! | ||
Consider URLs with query strings like `http://www.example.com/?foo=bar` as indiviual sites and add them to the sitemap. | ||
### `--query` | ||
### `--verbose` | ||
Default: `false` | ||
Consider URLs with query strings like `http://www.example.com/?foo=bar` as indiviual sites and add them to the sitemap. | ||
Print debug messages during crawling process. Also prints out a summery when finished. |
/* eslint no-unused-vars:0 */ | ||
var test = require('ava'); | ||
var fs = require('fs'); | ||
var path = require('path'); | ||
var port = require('./lib/constants').port; | ||
@@ -16,9 +19,8 @@ var baseUrl = require('./lib/constants').baseUrl; | ||
test.cb('should return null for invalid URL\'s', function (t) { | ||
t.plan(3); | ||
test.cb('should return error message for invalid URL\'s', function (t) { | ||
t.plan(2); | ||
exec('node cli.js invalid', function (error, stdout, stderr) { | ||
exec('node cli.js invalid sitemap.xml', function (error, stdout, stderr) { | ||
t.is(error, null, 'no error'); | ||
t.is(stderr, ''); | ||
t.regex(stdout, /^null/); | ||
t.not(stderr, ''); | ||
@@ -29,14 +31,8 @@ t.end(); | ||
test.cb('should return valid sitemap', function (t) { | ||
t.plan(6); | ||
test.cb('should return error message for missing/invalid filepath', function (t) { | ||
t.plan(2); | ||
exec('node cli.js ' + baseUrl + ':' + port, function (error, stdout, stderr) { | ||
t.is(error, null, 'no error'); | ||
t.is(stderr, '', 'no error messages'); | ||
// sitemap | ||
t.regex(stdout, /^<\?xml version="1.0" encoding="UTF-8"\?>/, 'has xml header'); | ||
var urlsRegex = /<urlset xmlns=".+?">(.|\n)+<\/urlset>/; | ||
t.regex(stdout, urlsRegex, 'has urlset property'); | ||
t.truthy(stdout.match(/<url>(.|\n)+?<\/url>/g), 'contains url properties'); | ||
t.truthy(stdout.match(/<loc>(.|\n)+?<\/loc>/g), 'contains loc properties'); | ||
t.not(stdout, ''); | ||
@@ -47,11 +43,35 @@ t.end(); | ||
test.cb('should return valid sitemap', function (t) { | ||
t.plan(7); | ||
exec('node cli.js ' + baseUrl + ':' + port + ' sitemap_valid.xml', | ||
function (error, stdout, stderr) { | ||
t.is(error, null, 'no error'); | ||
t.is(stderr, '', 'no error messages'); | ||
// sitemap | ||
var filePath = path.resolve('./sitemap_valid.xml'); | ||
t.truthy(fs.existsSync(filePath)); | ||
t.regex(fs.readFileSync(filePath), /^<\?xml version="1.0" encoding="UTF-8"\?>/); | ||
var urlsRegex = /<urlset xmlns=".+?">(.|\n)+<\/urlset>/; | ||
t.regex(fs.readFileSync(filePath), urlsRegex, 'has urlset property'); | ||
t.regex(fs.readFileSync(filePath), /<url>(.|\n)+?<\/url>/g, 'contains url properties'); | ||
t.regex(fs.readFileSync(filePath), /<loc>(.|\n)+?<\/loc>/g, 'contains loc properties'); | ||
t.end(); | ||
} | ||
); | ||
}); | ||
test.cb('should restrict crawler to baseurl if option is enabled', function (t) { | ||
t.plan(3); | ||
t.plan(4); | ||
// eslint-disable-next-line | ||
exec('node cli.js ' + baseUrl + ':' + port + '/subpage --baseurl', function (error, stdout, stderr) { | ||
exec('node cli.js --baseurl ' + baseUrl + ':' + port + '/subpage sitemap_baseurl.xml', function (error, stdout, stderr) { | ||
t.is(error, null, 'no error'); | ||
t.is(stderr, '', 'no error messages'); | ||
var filePath = path.resolve('sitemap_baseurl.xml'); | ||
t.truthy(fs.existsSync(filePath)); | ||
var regex = new RegExp('http:\/\/' + baseUrl + ':' + port + '/<'); | ||
t.falsy(regex.test(stdout), 'index page is not included in sitemap'); | ||
t.falsy(regex.test(fs.readFileSync(filePath)), 'index page is not included in sitemap'); | ||
@@ -63,31 +83,25 @@ t.end(); | ||
test.cb('should include query strings if enabled', function (t) { | ||
t.plan(5); | ||
t.plan(4); | ||
exec('node cli.js ' + baseUrl + ':' + port + ' --query', function (error, stdout, stderr) { | ||
t.is(error, null, 'no error'); | ||
t.is(stderr, '', 'no error messages'); | ||
t.not(stdout, '', 'stdout is not empty'); | ||
t.regex(stdout, /[^<\?xml version="1.0" encoding="UTF\-8"\?>]/, 'does not print xml sitemap'); | ||
exec('node cli.js --query ' + baseUrl + ':' + port + ' sitemap_query.xml', | ||
function (error, stdout, stderr) { | ||
t.is(error, null, 'no error'); | ||
t.is(stderr, '', 'no error messages'); | ||
var filePath = path.resolve('sitemap_query.xml'); | ||
t.truthy(fs.existsSync(filePath)); | ||
var regex = new RegExp('/?querypage'); | ||
t.truthy(regex.test(stdout), 'query page included'); | ||
var regex = new RegExp('/?querypage'); | ||
t.truthy(regex.test(fs.readFileSync(filePath)), 'query page included'); | ||
t.end(); | ||
}); | ||
t.end(); | ||
} | ||
); | ||
}); | ||
test.cb('should log requests if dry mode is enabled', function (t) { | ||
t.plan(4); | ||
test.cb.after(function (t) { | ||
// remove test sitemaps | ||
fs.unlinkSync(path.resolve('sitemap_baseurl.xml')); | ||
fs.unlinkSync(path.resolve('sitemap_query.xml')); | ||
fs.unlinkSync(path.resolve('sitemap_valid.xml')); | ||
exec('node cli.js ' + baseUrl + ':' + port + ' --dry', function (error, stdout, stderr) { | ||
t.is(error, null, 'no error'); | ||
t.is(stderr, '', 'no error messages'); | ||
t.not(stdout, '', 'stdout is not empty'); | ||
t.regex(stdout, /[^<\?xml version="1.0" encoding="UTF\-8"\?>]/, 'does not print xml sitemap'); | ||
t.end(); | ||
}); | ||
}); | ||
test.cb.after(function (t) { | ||
// stop test server | ||
@@ -94,0 +108,0 @@ server.close(function () { |
Sorry, the diff of this file is not supported yet
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
133460
247
2
+ Addedlodash.chunk@4.2.0(transitive)
+ Addedsimplecrawler@1.0.5(transitive)
+ Addedsitemap-generator@6.0.0(transitive)
- Removedsimplecrawler@1.0.3(transitive)
- Removedsitemap-generator@5.0.1(transitive)
Updatedsitemap-generator@6.0.0