New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

sitemap-generator

Package Overview
Dependencies
Maintainers
1
Versions
61
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sitemap-generator - npm Package Compare versions

Comparing version 3.1.1 to 4.0.0

SitemapGenerator.js

7

.eslintrc.json
{
"extends": "airbnb/base",
"extends": "graubnla/legacy",
"rules": {
"no-console": 0,
"no-var": 0,
"func-names": 0,
"object-shorthand": 0
"vars-on-top": 0
}
}
{
"name": "sitemap-generator",
"version": "3.1.1",
"description": "Create xml sitemaps from the command line.",
"homepage": "https://github.com/lgraubner/node-sitemap-generator",
"version": "4.0.0",
"description": "Easily create XML sitemaps for your website.",
"homepage": "https://github.com/lgraubner/sitemap-generator",
"author": {

@@ -14,2 +14,3 @@ "name": "Lars Graubner",

"xml",
"sitemap.xml",
"generator",

@@ -21,37 +22,31 @@ "crawler",

],
"main": "cli.js",
"main": "SitemapGenerator.js",
"repository": {
"type": "git",
"url": "https://github.com/lgraubner/node-sitemap-generator.git"
"url": "https://github.com/lgraubner/sitemap-generator.git"
},
"bugs": {
"url": "https://github.com/lgraubner/node-sitemap-generator/issues"
"url": "https://github.com/lgraubner/sitemap-generator/issues"
},
"dependencies": {
"simplecrawler": "^0.5.4",
"lodash": "^3.10.1",
"xmlbuilder": "^4.2.0",
"commander": "^2.9.0",
"chalk": "^1.1.1",
"url-parse": "^1.0.5",
"robots-parser": "^1.0.0",
"request": "^2.67.0"
"cheerio": "^0.20.0",
"lodash.assign": "^4.0.8",
"lodash.forin": "^4.1.0",
"robots": "^0.9.4",
"simplecrawler": "^0.7.0",
"xmlbuilder": "^8.2.2"
},
"preferGlobal": true,
"engines": {
"node": ">=0.12"
},
"bin": {
"sitemap-generator": "cli.js"
},
"license": "MIT",
"devDependencies": {
"chai": "^3.4.1",
"eslint": "^1.10.3",
"eslint-config-airbnb": "^3.1.0",
"mocha": "^2.3.4"
"ava": "^0.14.0",
"eslint": "^2.9.0",
"eslint-config-graubnla": "^2.0.2",
"lodash.isobject": "^3.0.2"
},
"scripts": {
"test": "eslint index.js lib/** && NODE_ENV=development mocha test"
"test": "eslint SitemapGenerator.js && ava test/all.js"
}
}

@@ -1,67 +0,108 @@

# Node Sitemap Generator
# Sitemap Generator
[![Travis](https://img.shields.io/travis/lgraubner/node-sitemap-generator.svg)](https://travis-ci.org/lgraubner/node-sitemap-generator) [![David](https://img.shields.io/david/lgraubner/node-sitemap-generator.svg)](https://david-dm.org/lgraubner/node-sitemap-generator) [![David Dev](https://img.shields.io/david/dev/lgraubner/node-sitemap-generator.svg)](https://david-dm.org/lgraubner/node-sitemap-generator#info=devDependencies) [![npm](https://img.shields.io/npm/v/sitemap-generator.svg)](https://www.npmjs.com/package/sitemap-generator)
[![Travis](https://img.shields.io/travis/lgraubner/sitemap-generator.svg)](https://travis-ci.org/lgraubner/sitemap-generator) [![David](https://img.shields.io/david/lgraubner/sitemap-generator.svg)](https://david-dm.org/lgraubner/sitemap-generator) [![David Dev](https://img.shields.io/david/dev/lgraubner/sitemap-generator.svg)](https://david-dm.org/lgraubner/sitemap-generator#info=devDependencies) [![npm](https://img.shields.io/npm/v/sitemap-generator-cli.svg)](https://www.npmjs.com/package/sitemap-generator)
> Create xml sitemaps from the command line.
> Easily create XML sitemaps for your website.
![](sitemap-generator.gif)
## Installation
```BASH
$ npm install -g sitemap-generator
$ npm install -S sitemap-generator
```
## Usage
```BASH
$ sitemap-generator [options] <url>
```JavaScript
var SitemapGenerator = require('sitemap-generator');
// create generator
var generator = new SitemapGenerator('example.com');
// register event listeners
generator.on('done', function (sitemap) {
console.log(sitemap); // => prints xml sitemap
});
// start the crawler
generator.start();
```
The crawler will fetch all sites matching folder URLs and file types [parsed by Google](https://support.google.com/webmasters/answer/35287?hl=en). If present the `robots.txt` will be taken into account and possible rules are applied for any URL to consider if it should be added to the sitemap.
The crawler will fetch all folder URL pages and file types [parsed by Google](https://support.google.com/webmasters/answer/35287?hl=en). If present the `robots.txt` will be taken into account and possible rules are applied for each URL to consider if it should be added to the sitemap. Also the crawler will not fetch URL's from a page if the robots meta tag with the value `nofollow` is present. The crawler is able to apply the `base` value to found links.
***Tip***: Omit the URL protocol, the crawler will detect the right one.
The protocol can be omitted, if the domain uses `http` or redirects to `https` are set up.
**Important**: Executing the sitemap-generator with sites using HTML `base`-tag will not work in most cases as it is not parsed by the crawler.
## Options
```BASH
$ sitemap-generator --help
Usage: sitemap-generator [options] <url>
You can provide some options to alter the behaviour of the crawler.
Options:
-h, --help output usage information
-V, --version output the version number
-q, --query consider query string
-f, --filename [filename] sets output filename
-p, --path [path] specifies output path
```JavaScript
var generator = new SitemapGenerator('example.com', {
port: 80,
restrictToBasepath: false,
stripQuerystring: true,
});
```
### query
### port
Consider URLs with query strings like `http://www.example.com/?foo=bar` as indiviual sites and add them to the sitemap.
Type: `number`
Default: `80`
```BASH
$ sitemap-generator --query example.com
```
Set an alternative port number instead of the standard port `80`. Used for the initial request.
### filename
### restrictToBasepath
Default: sitemap
Type: `boolean`
Default: `false`
Specify an alternate filename for the XML output file. The `.xml` file extension is optional, it will be added automatically.
If you specify an URL with a path (e.g. `example.com/foo/`) and this option is set to `true` the crawler will only fetch URL's matching `example.com/foo/*`. Otherwise it could also fetch `example.com` in case a link to this URL is provided.
```BASH
$ sitemap-generator --filename=sitemap-foo example.com
### stripQueryString
Type: `boolean`
Default: `true`
Whether to treat URL's with query strings like `http://www.example.com/?foo=bar` as indiviual sites and to add them to the sitemap.
## Events
The Sitemap Generator emits several events using nodes `EventEmitter`.
### `fetch`
Triggered when the crawler tries to fetch a ressource. Passes the status and the url as arguments. The status can be any HTTP status.
```JavaScript
generator.on('fetch', function (status, url) {
// log url
});
```
### path
### `ignore`
Default: .
If an URL matches a disallow rule in the `robots.txt` file this event is triggered. The URL will not be added to the sitemap. Passes the ignored url as argument.
Specify an alternate output path for the generated sitemap. Default is the current working directory.
```JavaScript
generator.on('ignore', function (url) {
// log ignored url
});
```
```BASH
$ sitemap-generator --path=../foo/bar example.com
### `clienterror`
Thrown if there was an error on client side while fetching an URL. Passes the crawler error and additional error data as arguments.
```JavaScript
generator.on('clienterror', function (queueError, errorData) {
// log error
});
```
### `done`
Triggered when the crawler finished and the sitemap is created. Passes the created XML markup as callback argument. The second argument provides an object containing found URL's, ignored URL's and faulty URL's.
```JavaScript
generator.on('done', function (sitemap, store) {
// do something with the sitemap, e.g. save as file
});
```

@@ -1,64 +0,149 @@

var http = require('http');
/* eslint-disable */
module.exports = {
'/': function (req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
'Content-Type': 'text/html',
});
res.write('<a href=\'/site\'>Link 1</a><a href=\'/ignore\'>Link 2</a>');
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<a href="/disallowed">disallowed</a>',
'<a href="img.jpg">Image</a>',
'<a href="/single">Single</a>',
].join('\n'));
res.end();
},
'/ignore': function (req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
'Content-Type': 'text/html',
});
res.write('this should be ignored!');
'/relative': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<a href="./">disallowed</a>',
].join('\n'));
res.end();
},
'/site': function (req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
'Content-Type': 'text/html',
});
res.write('<a href=\'/site/2\'>Link 2</a>');
'/disallowed': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.end();
},
'/site/2': function (req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
'Content-Type': 'text/html',
});
res.write('<a href=\'/site/?foo=bar\'');
'/special': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<a href="mailto:foo@bar.com">mail</a>',
'<a href="tel:+12356">telephone</a>',
].join('\n'));
res.end();
},
'/site/?foo=bar': function (req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
'Content-Type': 'text/html',
});
res.write('query');
'/single': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.end();
},
'/robots.txt': function (req, res) {
res.writeHead(
200,
http.STATUS_CODES[200], {
'Content-Type': 'text/plain',
});
'/restricted': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<a href="/">Home</a>',
].join('\n'));
res.end();
},
res.write('User-agent: *\nDisallow: /ignore');
'/relative/': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<a href="../">Home</a>',
].join('\n'));
res.end();
},
'/relative-2.html': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<a href="../">Home</a>',
].join('\n'));
res.end();
},
'/absolute': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<a href="http://127.0.0.1:5173/single">Single</a>',
].join('\n'));
res.end();
},
'/base': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<base href="http://127.0.0.1:5173/">',
'<a href="single">Single</a>',
].join('\n'));
res.end();
},
'/base-2': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<base href="/depth/">',
'<a href="sub">Sub</a>',
].join('\n'));
res.end();
},
'/depth/sub': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.end();
},
'/protocol': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<a href="//127.0.0.1:5173">Home</a>',
].join('\n'));
res.end();
},
'/querystring': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<a href="/querystring?foo=bar">Home</a>',
].join('\n'));
res.end();
},
'/querystring?foo=bar': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.end();
},
'/robotsmeta': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<meta name="robots" content="index,nofollow">',
'<a href="/robotsignored">ignored</a>',
].join('\n'));
res.end();
},
'/robotsignored': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.end();
},
'/noscripts': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write([
'<script src="/script"></script>',
].join('\n'));
res.end();
},
'/script': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.end();
},
'/robots.txt': function (req, res) {
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write('User-agent: *\nDisallow: /disallowed');
res.end();
},
};

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc