Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

sitemapper

Package Overview
Dependencies
Maintainers
1
Versions
53
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sitemapper - npm Package Compare versions

Comparing version 3.1.7 to 3.1.8

341

lib/assets/sitemapper.js

@@ -1,341 +0,2 @@

"use strict";
Object.defineProperty(exports, "__esModule", {
value: true
});
exports["default"] = void 0;
var _xml2jsEs6Promise = _interopRequireDefault(require("xml2js-es6-promise"));
var _got = _interopRequireDefault(require("got"));
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }
function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }
/**
* @typedef {Object} Sitemapper
*/
var Sitemapper = /*#__PURE__*/function () {
/**
* Construct the Sitemapper class
*
* @params {Object} options to set
* @params {string} [options.url] - the Sitemap url (e.g https://wp.seantburke.com/sitemap.xml)
* @params {Timeout} [options.timeout] - @see {timeout}
*
* @example let sitemap = new Sitemapper({
* url: 'https://wp.seantburke.com/sitemap.xml',
* timeout: 15000
* });
*/
function Sitemapper(options) {
_classCallCheck(this, Sitemapper);
var settings = options || {
'requestHeaders': {}
};
this.url = settings.url;
this.timeout = settings.timeout || 15000;
this.timeoutTable = {};
this.requestHeaders = settings.requestHeaders;
}
/**
* Gets the sites from a sitemap.xml with a given URL
*
* @public
* @param {string} [url] - the Sitemaps url (e.g https://wp.seantburke.com/sitemap.xml)
* @returns {Promise<SitesData>}
* @example sitemapper.fetch('example.xml')
* .then((sites) => console.log(sites));
*/
_createClass(Sitemapper, [{
key: "fetch",
value: function fetch() {
var _this = this;
var url = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : this.url;
return new Promise(function (resolve) {
return _this.crawl(url).then(function (sites) {
return resolve({
url: url,
sites: sites
});
});
});
}
/**
* Get the timeout
*
* @example console.log(sitemapper.timeout);
* @returns {Timeout}
*/
}, {
key: "parse",
/**
* Requests the URL and uses xmlParse to parse through and find the data
*
* @private
* @param {string} [url] - the Sitemaps url (e.g https://wp.seantburke.com/sitemap.xml)
* @returns {Promise<ParseData>}
*/
value: function parse() {
var _this2 = this;
var url = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : this.url;
var requestOptions = {
method: 'GET',
resolveWithFullResponse: true,
gzip: true,
headers: this.requestHeaders
};
return new Promise(function (resolve) {
var requester = (0, _got["default"])(url, requestOptions);
requester.then(function (response) {
if (!response || response.statusCode !== 200) {
clearTimeout(_this2.timeoutTable[url]);
return resolve({
error: response.error,
data: response
});
}
return (0, _xml2jsEs6Promise["default"])(response.body);
}).then(function (data) {
return resolve({
error: null,
data: data
});
})["catch"](function (response) {
return resolve({
error: response.error,
data: {}
});
});
_this2.initializeTimeout(url, requester, resolve);
});
}
/**
* Timeouts are necessary for large xml trees. This will cancel the call if the request is taking
* too long, but will still allow the promises to resolve.
*
* @private
* @param {string} url - url to use as a hash in the timeoutTable
* @param {Promise} requester - the promise that creates the web request to the url
* @param {Function} callback - the resolve method is used here to resolve the parent promise
*/
}, {
key: "initializeTimeout",
value: function initializeTimeout(url, requester, callback) {
var _this3 = this;
// this resolves instead of rejects in order to allow other requests to continue
this.timeoutTable[url] = setTimeout(function () {
requester.cancel();
callback({
error: "request timed out after ".concat(_this3.timeout, " milliseconds"),
data: {}
});
}, this.timeout);
}
/**
* Recursive function that will go through a sitemaps tree and get all the sites
*
* @private
* @recursive
* @param {string} url - the Sitemaps url (e.g https://wp.seantburke.com/sitemap.xml)
* @returns {Promise<SitesArray> | Promise<ParseData>}
*/
}, {
key: "crawl",
value: function crawl(url) {
var _this4 = this;
return new Promise(function (resolve) {
_this4.parse(url).then(function (_ref) {
var error = _ref.error,
data = _ref.data;
// The promise resolved, remove the timeout
clearTimeout(_this4.timeoutTable[url]);
if (error) {
// Fail silently
return resolve([]);
} else if (data && data.urlset && data.urlset.url) {
var sites = data.urlset.url.map(function (site) {
return site.loc && site.loc[0];
});
return resolve([].concat(sites));
} else if (data && data.sitemapindex) {
// Map each child url into a promise to create an array of promises
var sitemap = data.sitemapindex.sitemap.map(function (map) {
return map.loc && map.loc[0];
});
var promiseArray = sitemap.map(function (site) {
return _this4.crawl(site);
}); // Make sure all the promises resolve then filter and reduce the array
return Promise.all(promiseArray).then(function (results) {
var sites = results.filter(function (result) {
return !result.error;
}).reduce(function (prev, curr) {
return prev.concat(curr);
}, []);
return resolve(sites);
});
} // Fail silently
return resolve([]);
});
});
}
/**
* /**
* Gets the sites from a sitemap.xml with a given URL
* @deprecated
* @param {string} url - url to query
* @param {getSitesCallback} callback - callback for sites and error
* @callback
*/
}, {
key: "getSites",
value: function getSites() {
var url = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : this.url;
var callback = arguments.length > 1 ? arguments[1] : undefined;
console.warn( // eslint-disable-line no-console
'\r\nWarning:', 'function .getSites() is deprecated, please use the function .fetch()\r\n');
var err = {};
var sites = [];
this.fetch(url).then(function (response) {
sites = response.sites;
})["catch"](function (error) {
err = error;
});
return callback(err, sites);
}
}], [{
key: "timeout",
get: function get() {
return this.timeout;
}
/**
* Set the timeout
*
* @public
* @param {Timeout} duration
* @example sitemapper.timeout = 15000; // 15 seconds
*/
,
set: function set(duration) {
this.timeout = duration;
}
/**
*
* @param {string} url - url for making requests. Should be a link to a sitemaps.xml
* @example sitemapper.url = 'https://wp.seantburke.com/sitemap.xml'
*/
}, {
key: "url",
set: function set(url) {
this.url = url;
}
/**
* Get the url to parse
* @returns {string}
* @example console.log(sitemapper.url)
*/
,
get: function get() {
return this.url;
}
}]);
return Sitemapper;
}();
/**
* Callback for the getSites method
*
* @callback getSitesCallback
* @param {Object} error - error from callback
* @param {Array} sites - an Array of sitemaps
*/
/**
* Timeout in milliseconds
*
* @typedef {Number} Timeout
* the number of milliseconds before all requests timeout. The promises will still resolve so
* you'll still receive parts of the request, but maybe not all urls
* default is 15000 which is 15 seconds
*/
/**
* Resolve handler type for the promise in this.parse()
*
* @typedef {Object} ParseData
*
* @property {Error} error that either comes from `xmlParse` or `request` or custom error
* @property {Object} data
* @property {string} data.url - URL of sitemap
* @property {Array} data.urlset - Array of returned URLs
* @property {string} data.urlset.url - single Url
* @property {Object} data.sitemapindex - index of sitemap
* @property {string} data.sitemapindex.sitemap - Sitemap
* @example {
* error: "There was an error!"
* data: {
* url: 'linkedin.com',
* urlset: [{
* url: 'www.linkedin.com/project1'
* },[{
* url: 'www.linkedin.com/project2'
* }]
* }
* }
*/
/**
* Resolve handler type for the promise in this.parse()
*
* @typedef {Object} SitesData
*
* @property {string} url - the original url used to query the data
* @property {SitesArray} sites
* @example {
* url: 'linkedin.com/sitemap.xml',
* sites: [
* 'linkedin.com/project1',
* 'linkedin.com/project2'
* ]
* }
*/
/**
* An array of urls
*
* @typedef {String[]} SitesArray
* @example [
* 'www.google.com',
* 'www.linkedin.com'
* ]
*/
exports["default"] = Sitemapper;
module.exports = exports.default;
module.exports.default = exports.default;
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.default=void 0;var _xml2jsEs6Promise=_interopRequireDefault(require("xml2js-es6-promise")),_got=_interopRequireDefault(require("got"));function _interopRequireDefault(a){return a&&a.__esModule?a:{default:a}}class Sitemapper{constructor(a){var b=a||{requestHeaders:{}};this.url=b.url,this.timeout=b.timeout||15e3,this.timeoutTable={},this.requestHeaders=b.requestHeaders,this.debug=b.debug}fetch(){var a=0<arguments.length&&arguments[0]!==void 0?arguments[0]:this.url;return new Promise(b=>this.crawl(a).then(c=>b({url:a,sites:c})))}static get timeout(){return this.timeout}static set timeout(a){this.timeout=a}static set url(a){this.url=a}static get url(){return this.url}static set debug(a){this.debug=a}static get debug(){return this.debug}parse(){var a=0<arguments.length&&arguments[0]!==void 0?arguments[0]:this.url,b={method:"GET",resolveWithFullResponse:!0,gzip:!0,headers:this.requestHeaders};return new Promise(c=>{var d=(0,_got.default)(a,b);d.then(b=>b&&200===b.statusCode?(0,_xml2jsEs6Promise.default)(b.body):(clearTimeout(this.timeoutTable[a]),c({error:b.error,data:b}))).then(a=>c({error:null,data:a})).catch(a=>c({error:a.error,data:a})),this.initializeTimeout(a,d,c)})}initializeTimeout(a,b,c){this.timeoutTable[a]=setTimeout(()=>{b.cancel(),this.debug&&console.debug("crawl timed out"),c({error:"request timed out after ".concat(this.timeout," milliseconds for url: '").concat(a,"'"),data:{}})},this.timeout)}crawl(a){return new Promise(b=>{this.parse(a).then((c)=>{var{error:d,data:e}=c;if(clearTimeout(this.timeoutTable[a]),d)return this.debug&&console.error("Error occurred during \"crawl('".concat(a,"')\":\n\r Error: ").concat(d)),b([]);if(e&&e.urlset&&e.urlset.url){this.debug&&console.debug("Urlset found during \"crawl('".concat(a,"')\""));var h=e.urlset.url.map(a=>a.loc&&a.loc[0]);return b([].concat(h))}if(e&&e.sitemapindex){this.debug&&console.debug("Additional sitemap found during \"crawl('".concat(a,"')\""));var f=e.sitemapindex.sitemap.map(a=>a.loc&&a.loc[0]),g=f.map(a=>this.crawl(a));return Promise.all(g).then(a=>{var c=a.filter(a=>!a.error).reduce((a,b)=>a.concat(b),[]);return b(c)})}return this.debug&&console.error("Unknown state during \"crawl(".concat(a,")\":"),d,e),b([])})})}getSites(){var a=0<arguments.length&&void 0!==arguments[0]?arguments[0]:this.url,b=1<arguments.length?arguments[1]:void 0;console.warn("\r\nWarning:","function .getSites() is deprecated, please use the function .fetch()\r\n");var c={},d=[];return this.fetch(a).then(a=>{d=a.sites}).catch(a=>{c=a}),b(c,d)}}exports.default=Sitemapper,module.exports=exports.default,module.exports.default=exports.default;
//# sourceMappingURL=sitemapper.js.map

@@ -1,18 +0,2 @@

"use strict";
var _sitemapper = _interopRequireDefault(require("../assets/sitemapper.js"));
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
var Google = new _sitemapper["default"]({
url: 'https://www.google.com/work/sitemap.xml',
timeout: 15000 // 15 seconds
});
Google.fetch().then(function (data) {
return console.log(data.sites);
}) // eslint-disable-line no-console
["catch"](function (error) {
return console.log(error);
}); // eslint-disable-line no-console
"use strict";var _sitemapper=_interopRequireDefault(require("../assets/sitemapper.js"));function _interopRequireDefault(a){return a&&a.__esModule?a:{default:a}}var Google=new _sitemapper.default({url:"https://www.google.com/work/sitemap.xml",debug:!1,timeout:15e3});Google.fetch().then(a=>console.log(a.sites)).catch(a=>console.log(a));
//# sourceMappingURL=google.js.map

@@ -1,36 +0,2 @@

"use strict";
var _sitemapper = _interopRequireDefault(require("../assets/sitemapper.js"));
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
var sitemapper = new _sitemapper["default"]();
var Google = new _sitemapper["default"]({
url: 'https://www.google.com/work/sitemap.xml',
timeout: 15000 // 15 seconds
});
Google.fetch().then(function (data) {
return console.log(data.sites);
})["catch"](function (error) {
return console.log(error);
});
sitemapper.timeout = 5000;
sitemapper.fetch('https://wp.seantburke.com/sitemap.xml').then(function (_ref) {
var url = _ref.url,
sites = _ref.sites;
return console.log("url:".concat(url), 'sites:', sites);
})["catch"](function (error) {
return console.log(error);
});
sitemapper.fetch('http://www.cnn.com/sitemaps/sitemap-index.xml').then(function (data) {
return console.log(data);
})["catch"](function (error) {
return console.log(error);
});
sitemapper.fetch('http://www.stubhub.com/new-sitemap/us/sitemap-US-en-index.xml').then(function (data) {
return console.log(data);
})["catch"](function (error) {
return console.log(error);
});
"use strict";var _sitemapper=_interopRequireDefault(require("../assets/sitemapper"));function _interopRequireDefault(a){return a&&a.__esModule?a:{default:a}}function asyncGeneratorStep(a,b,c,d,e,f,g){try{var h=a[f](g),i=h.value}catch(a){return void c(a)}h.done?b(i):Promise.resolve(i).then(d,e)}function _asyncToGenerator(a){return function(){var b=this,c=arguments;return new Promise(function(d,e){function f(a){asyncGeneratorStep(h,d,e,f,g,"next",a)}function g(a){asyncGeneratorStep(h,d,e,f,g,"throw",a)}var h=a.apply(b,c);f(void 0)})}}var exampleURL="https://www.walmart.com/sitemap_topic.xml",sitemapper=new _sitemapper.default({url:"https://www.walmart.com/sitemap_topic.xml",debug:!0,timeout:1e4});_asyncToGenerator(function*(){try{var a=yield sitemapper.fetch();console.log(a)}catch(a){console.error(a)}})();
//# sourceMappingURL=index.js.map

37

package.json
{
"name": "sitemapper",
"version": "3.1.7",
"version": "3.1.8",
"description": "Parser for XML Sitemaps to be used with Robots.txt and web crawlers",

@@ -32,3 +32,3 @@ "keywords": [

"name": "Sean Thomas Burke",
"email": "sean@seantburke.com",
"email": "seantomburke@users.noreply.github.com",
"url": "http://www.seantburke.com"

@@ -39,3 +39,3 @@ },

"build": "npm run clean && npm run compile",
"start": "node lib/examples/index.js",
"start": "npm run build && node lib/examples/index.js",
"test": "npm run build && mocha ./lib/tests/*.js && npm run lint",

@@ -49,3 +49,3 @@ "lint": "eslint src",

"name": "Sean Thomas Burke",
"email": "sean@seantburke.com",
"email": "seantomburke@users.noreply.github.com",
"url": "http://www.seantburke.com"

@@ -62,17 +62,20 @@ }

"devDependencies": {
"@babel/cli": "^7.12.1",
"@babel/core": "^7.7.7",
"@babel/preset-env": "^7.7.7",
"@types/async": "^3.0.3",
"@babel/cli": "^7.12.8",
"@babel/core": "^7.12.9",
"@babel/preset-env": "^7.12.7",
"@babel/runtime": "^7.12.5",
"@types/async": "^3.2.4",
"@types/got": "^9.6.11",
"@types/is-url": "^1.2.28",
"@types/mocha": "^5.2.7",
"async": "^3.1.0",
"babel-plugin-add-module-exports": "^1.0.2",
"documentation": "^12.1.4",
"eslint": "^4.18.2",
"is-url": "^1.2.2",
"mocha": "^5.2.0",
"@types/mocha": "^8.0.4",
"async": "^3.2.0",
"babel-plugin-add-module-exports": "^1.0.4",
"babel-preset-minify": "^0.5.1",
"documentation": "^13.1.0",
"eslint": "^7.14.0",
"is-url": "^1.2.4",
"mocha": "^8.2.1",
"should": "^13.2.3",
"ts-node": "^8.6.1",
"typescript": "^3.9.7"
"ts-node": "^9.0.0",
"typescript": "^4.1.2"
},

@@ -79,0 +82,0 @@ "dependencies": {

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc