sitemap-generator
Advanced tools
Comparing version 3.1.1 to 4.0.0
{ | ||
"extends": "airbnb/base", | ||
"extends": "graubnla/legacy", | ||
"rules": { | ||
"no-console": 0, | ||
"no-var": 0, | ||
"func-names": 0, | ||
"object-shorthand": 0 | ||
"vars-on-top": 0 | ||
} | ||
} |
{ | ||
"name": "sitemap-generator", | ||
"version": "3.1.1", | ||
"description": "Create xml sitemaps from the command line.", | ||
"homepage": "https://github.com/lgraubner/node-sitemap-generator", | ||
"version": "4.0.0", | ||
"description": "Easily create XML sitemaps for your website.", | ||
"homepage": "https://github.com/lgraubner/sitemap-generator", | ||
"author": { | ||
@@ -14,2 +14,3 @@ "name": "Lars Graubner", | ||
"xml", | ||
"sitemap.xml", | ||
"generator", | ||
@@ -21,37 +22,31 @@ "crawler", | ||
], | ||
"main": "cli.js", | ||
"main": "SitemapGenerator.js", | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/lgraubner/node-sitemap-generator.git" | ||
"url": "https://github.com/lgraubner/sitemap-generator.git" | ||
}, | ||
"bugs": { | ||
"url": "https://github.com/lgraubner/node-sitemap-generator/issues" | ||
"url": "https://github.com/lgraubner/sitemap-generator/issues" | ||
}, | ||
"dependencies": { | ||
"simplecrawler": "^0.5.4", | ||
"lodash": "^3.10.1", | ||
"xmlbuilder": "^4.2.0", | ||
"commander": "^2.9.0", | ||
"chalk": "^1.1.1", | ||
"url-parse": "^1.0.5", | ||
"robots-parser": "^1.0.0", | ||
"request": "^2.67.0" | ||
"cheerio": "^0.20.0", | ||
"lodash.assign": "^4.0.8", | ||
"lodash.forin": "^4.1.0", | ||
"robots": "^0.9.4", | ||
"simplecrawler": "^0.7.0", | ||
"xmlbuilder": "^8.2.2" | ||
}, | ||
"preferGlobal": true, | ||
"engines": { | ||
"node": ">=0.12" | ||
}, | ||
"bin": { | ||
"sitemap-generator": "cli.js" | ||
}, | ||
"license": "MIT", | ||
"devDependencies": { | ||
"chai": "^3.4.1", | ||
"eslint": "^1.10.3", | ||
"eslint-config-airbnb": "^3.1.0", | ||
"mocha": "^2.3.4" | ||
"ava": "^0.14.0", | ||
"eslint": "^2.9.0", | ||
"eslint-config-graubnla": "^2.0.2", | ||
"lodash.isobject": "^3.0.2" | ||
}, | ||
"scripts": { | ||
"test": "eslint index.js lib/** && NODE_ENV=development mocha test" | ||
"test": "eslint SitemapGenerator.js && ava test/all.js" | ||
} | ||
} |
115
README.md
@@ -1,67 +0,108 @@ | ||
# Node Sitemap Generator | ||
# Sitemap Generator | ||
[](https://travis-ci.org/lgraubner/node-sitemap-generator) [](https://david-dm.org/lgraubner/node-sitemap-generator) [](https://david-dm.org/lgraubner/node-sitemap-generator#info=devDependencies) [](https://www.npmjs.com/package/sitemap-generator) | ||
[](https://travis-ci.org/lgraubner/sitemap-generator) [](https://david-dm.org/lgraubner/sitemap-generator) [](https://david-dm.org/lgraubner/sitemap-generator#info=devDependencies) [](https://www.npmjs.com/package/sitemap-generator) | ||
> Create xml sitemaps from the command line. | ||
> Easily create XML sitemaps for your website. | ||
 | ||
## Installation | ||
```BASH | ||
$ npm install -g sitemap-generator | ||
$ npm install -S sitemap-generator | ||
``` | ||
## Usage | ||
```BASH | ||
$ sitemap-generator [options] <url> | ||
```JavaScript | ||
var SitemapGenerator = require('sitemap-generator'); | ||
// create generator | ||
var generator = new SitemapGenerator('example.com'); | ||
// register event listeners | ||
generator.on('done', function (sitemap) { | ||
console.log(sitemap); // => prints xml sitemap | ||
}); | ||
// start the crawler | ||
generator.start(); | ||
``` | ||
The crawler will fetch all sites matching folder URLs and file types [parsed by Google](https://support.google.com/webmasters/answer/35287?hl=en). If present the `robots.txt` will be taken into account and possible rules are applied for any URL to consider if it should be added to the sitemap. | ||
The crawler will fetch all folder URL pages and file types [parsed by Google](https://support.google.com/webmasters/answer/35287?hl=en). If present the `robots.txt` will be taken into account and possible rules are applied for each URL to consider if it should be added to the sitemap. Also the crawler will not fetch URL's from a page if the robots meta tag with the value `nofollow` is present. The crawler is able to apply the `base` value to found links. | ||
***Tip***: Omit the URL protocol, the crawler will detect the right one. | ||
The protocol can be omitted, if the domain uses `http` or redirects to `https` are set up. | ||
**Important**: Executing the sitemap-generator with sites using HTML `base`-tag will not work in most cases as it is not parsed by the crawler. | ||
## Options | ||
```BASH | ||
$ sitemap-generator --help | ||
Usage: sitemap-generator [options] <url> | ||
You can provide some options to alter the behaviour of the crawler. | ||
Options: | ||
-h, --help output usage information | ||
-V, --version output the version number | ||
-q, --query consider query string | ||
-f, --filename [filename] sets output filename | ||
-p, --path [path] specifies output path | ||
```JavaScript | ||
var generator = new SitemapGenerator('example.com', { | ||
port: 80, | ||
restrictToBasepath: false, | ||
stripQuerystring: true, | ||
}); | ||
``` | ||
### query | ||
### port | ||
Consider URLs with query strings like `http://www.example.com/?foo=bar` as indiviual sites and add them to the sitemap. | ||
Type: `number` | ||
Default: `80` | ||
```BASH | ||
$ sitemap-generator --query example.com | ||
``` | ||
Set an alternative port number instead of the standard port `80`. Used for the initial request. | ||
### filename | ||
### restrictToBasepath | ||
Default: sitemap | ||
Type: `boolean` | ||
Default: `false` | ||
Specify an alternate filename for the XML output file. The `.xml` file extension is optional, it will be added automatically. | ||
If you specify an URL with a path (e.g. `example.com/foo/`) and this option is set to `true` the crawler will only fetch URL's matching `example.com/foo/*`. Otherwise it could also fetch `example.com` in case a link to this URL is provided. | ||
```BASH | ||
$ sitemap-generator --filename=sitemap-foo example.com | ||
### stripQueryString | ||
Type: `boolean` | ||
Default: `true` | ||
Whether to treat URL's with query strings like `http://www.example.com/?foo=bar` as indiviual sites and to add them to the sitemap. | ||
## Events | ||
The Sitemap Generator emits several events using nodes `EventEmitter`. | ||
### `fetch` | ||
Triggered when the crawler tries to fetch a ressource. Passes the status and the url as arguments. The status can be any HTTP status. | ||
```JavaScript | ||
generator.on('fetch', function (status, url) { | ||
// log url | ||
}); | ||
``` | ||
### path | ||
### `ignore` | ||
Default: . | ||
If an URL matches a disallow rule in the `robots.txt` file this event is triggered. The URL will not be added to the sitemap. Passes the ignored url as argument. | ||
Specify an alternate output path for the generated sitemap. Default is the current working directory. | ||
```JavaScript | ||
generator.on('ignore', function (url) { | ||
// log ignored url | ||
}); | ||
``` | ||
```BASH | ||
$ sitemap-generator --path=../foo/bar example.com | ||
### `clienterror` | ||
Thrown if there was an error on client side while fetching an URL. Passes the crawler error and additional error data as arguments. | ||
```JavaScript | ||
generator.on('clienterror', function (queueError, errorData) { | ||
// log error | ||
}); | ||
``` | ||
### `done` | ||
Triggered when the crawler finished and the sitemap is created. Passes the created XML markup as callback argument. The second argument provides an object containing found URL's, ignored URL's and faulty URL's. | ||
```JavaScript | ||
generator.on('done', function (sitemap, store) { | ||
// do something with the sitemap, e.g. save as file | ||
}); | ||
``` |
@@ -1,64 +0,149 @@ | ||
var http = require('http'); | ||
/* eslint-disable */ | ||
module.exports = { | ||
'/': function (req, res) { | ||
res.writeHead( | ||
200, | ||
http.STATUS_CODES[200], { | ||
'Content-Type': 'text/html', | ||
}); | ||
res.write('<a href=\'/site\'>Link 1</a><a href=\'/ignore\'>Link 2</a>'); | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<a href="/disallowed">disallowed</a>', | ||
'<a href="img.jpg">Image</a>', | ||
'<a href="/single">Single</a>', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
'/ignore': function (req, res) { | ||
res.writeHead( | ||
200, | ||
http.STATUS_CODES[200], { | ||
'Content-Type': 'text/html', | ||
}); | ||
res.write('this should be ignored!'); | ||
'/relative': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<a href="./">disallowed</a>', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
'/site': function (req, res) { | ||
res.writeHead( | ||
200, | ||
http.STATUS_CODES[200], { | ||
'Content-Type': 'text/html', | ||
}); | ||
res.write('<a href=\'/site/2\'>Link 2</a>'); | ||
'/disallowed': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.end(); | ||
}, | ||
'/site/2': function (req, res) { | ||
res.writeHead( | ||
200, | ||
http.STATUS_CODES[200], { | ||
'Content-Type': 'text/html', | ||
}); | ||
res.write('<a href=\'/site/?foo=bar\''); | ||
'/special': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<a href="mailto:foo@bar.com">mail</a>', | ||
'<a href="tel:+12356">telephone</a>', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
'/site/?foo=bar': function (req, res) { | ||
res.writeHead( | ||
200, | ||
http.STATUS_CODES[200], { | ||
'Content-Type': 'text/html', | ||
}); | ||
res.write('query'); | ||
'/single': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.end(); | ||
}, | ||
'/robots.txt': function (req, res) { | ||
res.writeHead( | ||
200, | ||
http.STATUS_CODES[200], { | ||
'Content-Type': 'text/plain', | ||
}); | ||
'/restricted': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<a href="/">Home</a>', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
res.write('User-agent: *\nDisallow: /ignore'); | ||
'/relative/': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<a href="../">Home</a>', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
'/relative-2.html': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<a href="../">Home</a>', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
'/absolute': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<a href="http://127.0.0.1:5173/single">Single</a>', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
'/base': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<base href="http://127.0.0.1:5173/">', | ||
'<a href="single">Single</a>', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
'/base-2': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<base href="/depth/">', | ||
'<a href="sub">Sub</a>', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
'/depth/sub': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.end(); | ||
}, | ||
'/protocol': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<a href="//127.0.0.1:5173">Home</a>', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
'/querystring': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<a href="/querystring?foo=bar">Home</a>', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
'/querystring?foo=bar': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.end(); | ||
}, | ||
'/robotsmeta': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<meta name="robots" content="index,nofollow">', | ||
'<a href="/robotsignored">ignored</a>', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
'/robotsignored': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.end(); | ||
}, | ||
'/noscripts': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write([ | ||
'<script src="/script"></script>', | ||
].join('\n')); | ||
res.end(); | ||
}, | ||
'/script': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.end(); | ||
}, | ||
'/robots.txt': function (req, res) { | ||
res.writeHead(200, { 'Content-Type': 'text/html' }); | ||
res.write('User-agent: *\nDisallow: /disallowed'); | ||
res.end(); | ||
}, | ||
}; |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Deprecated
MaintenanceThe maintainer of the package marked it as deprecated. This could indicate that a single version should not be used, or that the package is no longer maintained and any new vulnerabilities will not be fixed.
Found 1 instance in 1 package
Shell access
Supply chain riskThis module accesses the system shell. Accessing the system shell increases the risk of executing arbitrary code.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
6
7603
18
724
0
109
0
28291
+ Addedcheerio@^0.20.0
+ Addedlodash.assign@^4.0.8
+ Addedlodash.forin@^4.1.0
+ Addedrobots@^0.9.4
+ Addedabab@1.0.4(transitive)
+ Addedacorn@2.7.0(transitive)
+ Addedacorn-globals@1.0.9(transitive)
+ Addedboolbase@1.0.0(transitive)
+ Addedcheerio@0.20.0(transitive)
+ Addedcore-util-is@1.0.3(transitive)
+ Addedcss-select@1.2.0(transitive)
+ Addedcss-what@2.1.3(transitive)
+ Addedcssom@0.3.8(transitive)
+ Addedcssstyle@0.2.37(transitive)
+ Addeddeep-is@0.1.4(transitive)
+ Addeddom-serializer@0.1.1(transitive)
+ Addeddomelementtype@1.3.1(transitive)
+ Addeddomhandler@2.3.0(transitive)
+ Addeddomutils@1.5.1(transitive)
+ Addedentities@1.0.01.1.2(transitive)
+ Addedescodegen@1.14.3(transitive)
+ Addedesprima@4.0.1(transitive)
+ Addedestraverse@4.3.0(transitive)
+ Addedesutils@2.0.3(transitive)
+ Addedfast-levenshtein@2.0.6(transitive)
+ Addedhtmlparser2@3.8.3(transitive)
+ Addediconv-lite@0.4.24(transitive)
+ Addedinherits@2.0.4(transitive)
+ Addedisarray@0.0.1(transitive)
+ Addedjsdom@7.2.2(transitive)
+ Addedlevn@0.3.0(transitive)
+ Addedlodash.assign@4.2.0(transitive)
+ Addedlodash.forin@4.4.0(transitive)
+ Addednth-check@1.0.2(transitive)
+ Addednwmatcher@1.4.4(transitive)
+ Addedoptionator@0.8.3(transitive)
+ Addedparse5@1.5.1(transitive)
+ Addedprelude-ls@1.1.2(transitive)
+ Addedreadable-stream@1.1.14(transitive)
+ Addedrobots@0.9.5(transitive)
+ Addedsax@1.4.1(transitive)
+ Addedsimplecrawler@0.7.0(transitive)
+ Addedsource-map@0.6.1(transitive)
+ Addedstring_decoder@0.10.31(transitive)
+ Addedsymbol-tree@3.2.4(transitive)
+ Addedtr46@0.0.3(transitive)
+ Addedtype-check@0.3.2(transitive)
+ Addedwebidl-conversions@2.0.1(transitive)
+ Addedwhatwg-url-compat@0.6.5(transitive)
+ Addedword-wrap@1.2.5(transitive)
+ Addedxml-name-validator@2.0.1(transitive)
+ Addedxmlbuilder@8.2.2(transitive)
- Removedchalk@^1.1.1
- Removedcommander@^2.9.0
- Removedlodash@^3.10.1
- Removedrequest@^2.67.0
- Removedrobots-parser@^1.0.0
- Removedurl-parse@^1.0.5
- Removedansi-regex@2.1.1(transitive)
- Removedansi-styles@2.2.1(transitive)
- Removedchalk@1.1.3(transitive)
- Removedcommander@2.20.3(transitive)
- Removedescape-string-regexp@1.0.5(transitive)
- Removedhas-ansi@2.0.0(transitive)
- Removedlodash@3.10.1(transitive)
- Removedquerystringify@2.2.0(transitive)
- Removedrequires-port@1.0.0(transitive)
- Removedrobots-parser@1.0.2(transitive)
- Removedsimplecrawler@0.5.4(transitive)
- Removedstrip-ansi@3.0.1(transitive)
- Removedsupports-color@2.0.0(transitive)
- Removedurl-parse@1.5.10(transitive)
- Removedxmlbuilder@4.2.1(transitive)
Updatedsimplecrawler@^0.7.0
Updatedxmlbuilder@^8.2.2