sitemapper
Advanced tools
Comparing version 3.1.7 to 3.1.8
@@ -1,341 +0,2 @@ | ||
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { | ||
value: true | ||
}); | ||
exports["default"] = void 0; | ||
var _xml2jsEs6Promise = _interopRequireDefault(require("xml2js-es6-promise")); | ||
var _got = _interopRequireDefault(require("got")); | ||
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; } | ||
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } | ||
function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } | ||
function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; } | ||
/** | ||
* @typedef {Object} Sitemapper | ||
*/ | ||
var Sitemapper = /*#__PURE__*/function () { | ||
/** | ||
* Construct the Sitemapper class | ||
* | ||
* @params {Object} options to set | ||
* @params {string} [options.url] - the Sitemap url (e.g https://wp.seantburke.com/sitemap.xml) | ||
* @params {Timeout} [options.timeout] - @see {timeout} | ||
* | ||
* @example let sitemap = new Sitemapper({ | ||
* url: 'https://wp.seantburke.com/sitemap.xml', | ||
* timeout: 15000 | ||
* }); | ||
*/ | ||
function Sitemapper(options) { | ||
_classCallCheck(this, Sitemapper); | ||
var settings = options || { | ||
'requestHeaders': {} | ||
}; | ||
this.url = settings.url; | ||
this.timeout = settings.timeout || 15000; | ||
this.timeoutTable = {}; | ||
this.requestHeaders = settings.requestHeaders; | ||
} | ||
/** | ||
* Gets the sites from a sitemap.xml with a given URL | ||
* | ||
* @public | ||
* @param {string} [url] - the Sitemaps url (e.g https://wp.seantburke.com/sitemap.xml) | ||
* @returns {Promise<SitesData>} | ||
* @example sitemapper.fetch('example.xml') | ||
* .then((sites) => console.log(sites)); | ||
*/ | ||
_createClass(Sitemapper, [{ | ||
key: "fetch", | ||
value: function fetch() { | ||
var _this = this; | ||
var url = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : this.url; | ||
return new Promise(function (resolve) { | ||
return _this.crawl(url).then(function (sites) { | ||
return resolve({ | ||
url: url, | ||
sites: sites | ||
}); | ||
}); | ||
}); | ||
} | ||
/** | ||
* Get the timeout | ||
* | ||
* @example console.log(sitemapper.timeout); | ||
* @returns {Timeout} | ||
*/ | ||
}, { | ||
key: "parse", | ||
/** | ||
* Requests the URL and uses xmlParse to parse through and find the data | ||
* | ||
* @private | ||
* @param {string} [url] - the Sitemaps url (e.g https://wp.seantburke.com/sitemap.xml) | ||
* @returns {Promise<ParseData>} | ||
*/ | ||
value: function parse() { | ||
var _this2 = this; | ||
var url = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : this.url; | ||
var requestOptions = { | ||
method: 'GET', | ||
resolveWithFullResponse: true, | ||
gzip: true, | ||
headers: this.requestHeaders | ||
}; | ||
return new Promise(function (resolve) { | ||
var requester = (0, _got["default"])(url, requestOptions); | ||
requester.then(function (response) { | ||
if (!response || response.statusCode !== 200) { | ||
clearTimeout(_this2.timeoutTable[url]); | ||
return resolve({ | ||
error: response.error, | ||
data: response | ||
}); | ||
} | ||
return (0, _xml2jsEs6Promise["default"])(response.body); | ||
}).then(function (data) { | ||
return resolve({ | ||
error: null, | ||
data: data | ||
}); | ||
})["catch"](function (response) { | ||
return resolve({ | ||
error: response.error, | ||
data: {} | ||
}); | ||
}); | ||
_this2.initializeTimeout(url, requester, resolve); | ||
}); | ||
} | ||
/** | ||
* Timeouts are necessary for large xml trees. This will cancel the call if the request is taking | ||
* too long, but will still allow the promises to resolve. | ||
* | ||
* @private | ||
* @param {string} url - url to use as a hash in the timeoutTable | ||
* @param {Promise} requester - the promise that creates the web request to the url | ||
* @param {Function} callback - the resolve method is used here to resolve the parent promise | ||
*/ | ||
}, { | ||
key: "initializeTimeout", | ||
value: function initializeTimeout(url, requester, callback) { | ||
var _this3 = this; | ||
// this resolves instead of rejects in order to allow other requests to continue | ||
this.timeoutTable[url] = setTimeout(function () { | ||
requester.cancel(); | ||
callback({ | ||
error: "request timed out after ".concat(_this3.timeout, " milliseconds"), | ||
data: {} | ||
}); | ||
}, this.timeout); | ||
} | ||
/** | ||
* Recursive function that will go through a sitemaps tree and get all the sites | ||
* | ||
* @private | ||
* @recursive | ||
* @param {string} url - the Sitemaps url (e.g https://wp.seantburke.com/sitemap.xml) | ||
* @returns {Promise<SitesArray> | Promise<ParseData>} | ||
*/ | ||
}, { | ||
key: "crawl", | ||
value: function crawl(url) { | ||
var _this4 = this; | ||
return new Promise(function (resolve) { | ||
_this4.parse(url).then(function (_ref) { | ||
var error = _ref.error, | ||
data = _ref.data; | ||
// The promise resolved, remove the timeout | ||
clearTimeout(_this4.timeoutTable[url]); | ||
if (error) { | ||
// Fail silently | ||
return resolve([]); | ||
} else if (data && data.urlset && data.urlset.url) { | ||
var sites = data.urlset.url.map(function (site) { | ||
return site.loc && site.loc[0]; | ||
}); | ||
return resolve([].concat(sites)); | ||
} else if (data && data.sitemapindex) { | ||
// Map each child url into a promise to create an array of promises | ||
var sitemap = data.sitemapindex.sitemap.map(function (map) { | ||
return map.loc && map.loc[0]; | ||
}); | ||
var promiseArray = sitemap.map(function (site) { | ||
return _this4.crawl(site); | ||
}); // Make sure all the promises resolve then filter and reduce the array | ||
return Promise.all(promiseArray).then(function (results) { | ||
var sites = results.filter(function (result) { | ||
return !result.error; | ||
}).reduce(function (prev, curr) { | ||
return prev.concat(curr); | ||
}, []); | ||
return resolve(sites); | ||
}); | ||
} // Fail silently | ||
return resolve([]); | ||
}); | ||
}); | ||
} | ||
/** | ||
* /** | ||
* Gets the sites from a sitemap.xml with a given URL | ||
* @deprecated | ||
* @param {string} url - url to query | ||
* @param {getSitesCallback} callback - callback for sites and error | ||
* @callback | ||
*/ | ||
}, { | ||
key: "getSites", | ||
value: function getSites() { | ||
var url = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : this.url; | ||
var callback = arguments.length > 1 ? arguments[1] : undefined; | ||
console.warn( // eslint-disable-line no-console | ||
'\r\nWarning:', 'function .getSites() is deprecated, please use the function .fetch()\r\n'); | ||
var err = {}; | ||
var sites = []; | ||
this.fetch(url).then(function (response) { | ||
sites = response.sites; | ||
})["catch"](function (error) { | ||
err = error; | ||
}); | ||
return callback(err, sites); | ||
} | ||
}], [{ | ||
key: "timeout", | ||
get: function get() { | ||
return this.timeout; | ||
} | ||
/** | ||
* Set the timeout | ||
* | ||
* @public | ||
* @param {Timeout} duration | ||
* @example sitemapper.timeout = 15000; // 15 seconds | ||
*/ | ||
, | ||
set: function set(duration) { | ||
this.timeout = duration; | ||
} | ||
/** | ||
* | ||
* @param {string} url - url for making requests. Should be a link to a sitemaps.xml | ||
* @example sitemapper.url = 'https://wp.seantburke.com/sitemap.xml' | ||
*/ | ||
}, { | ||
key: "url", | ||
set: function set(url) { | ||
this.url = url; | ||
} | ||
/** | ||
* Get the url to parse | ||
* @returns {string} | ||
* @example console.log(sitemapper.url) | ||
*/ | ||
, | ||
get: function get() { | ||
return this.url; | ||
} | ||
}]); | ||
return Sitemapper; | ||
}(); | ||
/** | ||
* Callback for the getSites method | ||
* | ||
* @callback getSitesCallback | ||
* @param {Object} error - error from callback | ||
* @param {Array} sites - an Array of sitemaps | ||
*/ | ||
/** | ||
* Timeout in milliseconds | ||
* | ||
* @typedef {Number} Timeout | ||
* the number of milliseconds before all requests timeout. The promises will still resolve so | ||
* you'll still receive parts of the request, but maybe not all urls | ||
* default is 15000 which is 15 seconds | ||
*/ | ||
/** | ||
* Resolve handler type for the promise in this.parse() | ||
* | ||
* @typedef {Object} ParseData | ||
* | ||
* @property {Error} error that either comes from `xmlParse` or `request` or custom error | ||
* @property {Object} data | ||
* @property {string} data.url - URL of sitemap | ||
* @property {Array} data.urlset - Array of returned URLs | ||
* @property {string} data.urlset.url - single Url | ||
* @property {Object} data.sitemapindex - index of sitemap | ||
* @property {string} data.sitemapindex.sitemap - Sitemap | ||
* @example { | ||
* error: "There was an error!" | ||
* data: { | ||
* url: 'linkedin.com', | ||
* urlset: [{ | ||
* url: 'www.linkedin.com/project1' | ||
* },[{ | ||
* url: 'www.linkedin.com/project2' | ||
* }] | ||
* } | ||
* } | ||
*/ | ||
/** | ||
* Resolve handler type for the promise in this.parse() | ||
* | ||
* @typedef {Object} SitesData | ||
* | ||
* @property {string} url - the original url used to query the data | ||
* @property {SitesArray} sites | ||
* @example { | ||
* url: 'linkedin.com/sitemap.xml', | ||
* sites: [ | ||
* 'linkedin.com/project1', | ||
* 'linkedin.com/project2' | ||
* ] | ||
* } | ||
*/ | ||
/** | ||
* An array of urls | ||
* | ||
* @typedef {String[]} SitesArray | ||
* @example [ | ||
* 'www.google.com', | ||
* 'www.linkedin.com' | ||
* ] | ||
*/ | ||
exports["default"] = Sitemapper; | ||
module.exports = exports.default; | ||
module.exports.default = exports.default; | ||
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.default=void 0;var _xml2jsEs6Promise=_interopRequireDefault(require("xml2js-es6-promise")),_got=_interopRequireDefault(require("got"));function _interopRequireDefault(a){return a&&a.__esModule?a:{default:a}}class Sitemapper{constructor(a){var b=a||{requestHeaders:{}};this.url=b.url,this.timeout=b.timeout||15e3,this.timeoutTable={},this.requestHeaders=b.requestHeaders,this.debug=b.debug}fetch(){var a=0<arguments.length&&arguments[0]!==void 0?arguments[0]:this.url;return new Promise(b=>this.crawl(a).then(c=>b({url:a,sites:c})))}static get timeout(){return this.timeout}static set timeout(a){this.timeout=a}static set url(a){this.url=a}static get url(){return this.url}static set debug(a){this.debug=a}static get debug(){return this.debug}parse(){var a=0<arguments.length&&arguments[0]!==void 0?arguments[0]:this.url,b={method:"GET",resolveWithFullResponse:!0,gzip:!0,headers:this.requestHeaders};return new Promise(c=>{var d=(0,_got.default)(a,b);d.then(b=>b&&200===b.statusCode?(0,_xml2jsEs6Promise.default)(b.body):(clearTimeout(this.timeoutTable[a]),c({error:b.error,data:b}))).then(a=>c({error:null,data:a})).catch(a=>c({error:a.error,data:a})),this.initializeTimeout(a,d,c)})}initializeTimeout(a,b,c){this.timeoutTable[a]=setTimeout(()=>{b.cancel(),this.debug&&console.debug("crawl timed out"),c({error:"request timed out after ".concat(this.timeout," milliseconds for url: '").concat(a,"'"),data:{}})},this.timeout)}crawl(a){return new Promise(b=>{this.parse(a).then((c)=>{var{error:d,data:e}=c;if(clearTimeout(this.timeoutTable[a]),d)return this.debug&&console.error("Error occurred during \"crawl('".concat(a,"')\":\n\r Error: ").concat(d)),b([]);if(e&&e.urlset&&e.urlset.url){this.debug&&console.debug("Urlset found during \"crawl('".concat(a,"')\""));var h=e.urlset.url.map(a=>a.loc&&a.loc[0]);return b([].concat(h))}if(e&&e.sitemapindex){this.debug&&console.debug("Additional sitemap found during \"crawl('".concat(a,"')\""));var f=e.sitemapindex.sitemap.map(a=>a.loc&&a.loc[0]),g=f.map(a=>this.crawl(a));return Promise.all(g).then(a=>{var c=a.filter(a=>!a.error).reduce((a,b)=>a.concat(b),[]);return b(c)})}return this.debug&&console.error("Unknown state during \"crawl(".concat(a,")\":"),d,e),b([])})})}getSites(){var a=0<arguments.length&&void 0!==arguments[0]?arguments[0]:this.url,b=1<arguments.length?arguments[1]:void 0;console.warn("\r\nWarning:","function .getSites() is deprecated, please use the function .fetch()\r\n");var c={},d=[];return this.fetch(a).then(a=>{d=a.sites}).catch(a=>{c=a}),b(c,d)}}exports.default=Sitemapper,module.exports=exports.default,module.exports.default=exports.default; | ||
//# sourceMappingURL=sitemapper.js.map |
@@ -1,18 +0,2 @@ | ||
"use strict"; | ||
var _sitemapper = _interopRequireDefault(require("../assets/sitemapper.js")); | ||
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; } | ||
var Google = new _sitemapper["default"]({ | ||
url: 'https://www.google.com/work/sitemap.xml', | ||
timeout: 15000 // 15 seconds | ||
}); | ||
Google.fetch().then(function (data) { | ||
return console.log(data.sites); | ||
}) // eslint-disable-line no-console | ||
["catch"](function (error) { | ||
return console.log(error); | ||
}); // eslint-disable-line no-console | ||
"use strict";var _sitemapper=_interopRequireDefault(require("../assets/sitemapper.js"));function _interopRequireDefault(a){return a&&a.__esModule?a:{default:a}}var Google=new _sitemapper.default({url:"https://www.google.com/work/sitemap.xml",debug:!1,timeout:15e3});Google.fetch().then(a=>console.log(a.sites)).catch(a=>console.log(a)); | ||
//# sourceMappingURL=google.js.map |
@@ -1,36 +0,2 @@ | ||
"use strict"; | ||
var _sitemapper = _interopRequireDefault(require("../assets/sitemapper.js")); | ||
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; } | ||
var sitemapper = new _sitemapper["default"](); | ||
var Google = new _sitemapper["default"]({ | ||
url: 'https://www.google.com/work/sitemap.xml', | ||
timeout: 15000 // 15 seconds | ||
}); | ||
Google.fetch().then(function (data) { | ||
return console.log(data.sites); | ||
})["catch"](function (error) { | ||
return console.log(error); | ||
}); | ||
sitemapper.timeout = 5000; | ||
sitemapper.fetch('https://wp.seantburke.com/sitemap.xml').then(function (_ref) { | ||
var url = _ref.url, | ||
sites = _ref.sites; | ||
return console.log("url:".concat(url), 'sites:', sites); | ||
})["catch"](function (error) { | ||
return console.log(error); | ||
}); | ||
sitemapper.fetch('http://www.cnn.com/sitemaps/sitemap-index.xml').then(function (data) { | ||
return console.log(data); | ||
})["catch"](function (error) { | ||
return console.log(error); | ||
}); | ||
sitemapper.fetch('http://www.stubhub.com/new-sitemap/us/sitemap-US-en-index.xml').then(function (data) { | ||
return console.log(data); | ||
})["catch"](function (error) { | ||
return console.log(error); | ||
}); | ||
"use strict";var _sitemapper=_interopRequireDefault(require("../assets/sitemapper"));function _interopRequireDefault(a){return a&&a.__esModule?a:{default:a}}function asyncGeneratorStep(a,b,c,d,e,f,g){try{var h=a[f](g),i=h.value}catch(a){return void c(a)}h.done?b(i):Promise.resolve(i).then(d,e)}function _asyncToGenerator(a){return function(){var b=this,c=arguments;return new Promise(function(d,e){function f(a){asyncGeneratorStep(h,d,e,f,g,"next",a)}function g(a){asyncGeneratorStep(h,d,e,f,g,"throw",a)}var h=a.apply(b,c);f(void 0)})}}var exampleURL="https://www.walmart.com/sitemap_topic.xml",sitemapper=new _sitemapper.default({url:"https://www.walmart.com/sitemap_topic.xml",debug:!0,timeout:1e4});_asyncToGenerator(function*(){try{var a=yield sitemapper.fetch();console.log(a)}catch(a){console.error(a)}})(); | ||
//# sourceMappingURL=index.js.map |
{ | ||
"name": "sitemapper", | ||
"version": "3.1.7", | ||
"version": "3.1.8", | ||
"description": "Parser for XML Sitemaps to be used with Robots.txt and web crawlers", | ||
@@ -32,3 +32,3 @@ "keywords": [ | ||
"name": "Sean Thomas Burke", | ||
"email": "sean@seantburke.com", | ||
"email": "seantomburke@users.noreply.github.com", | ||
"url": "http://www.seantburke.com" | ||
@@ -39,3 +39,3 @@ }, | ||
"build": "npm run clean && npm run compile", | ||
"start": "node lib/examples/index.js", | ||
"start": "npm run build && node lib/examples/index.js", | ||
"test": "npm run build && mocha ./lib/tests/*.js && npm run lint", | ||
@@ -49,3 +49,3 @@ "lint": "eslint src", | ||
"name": "Sean Thomas Burke", | ||
"email": "sean@seantburke.com", | ||
"email": "seantomburke@users.noreply.github.com", | ||
"url": "http://www.seantburke.com" | ||
@@ -62,17 +62,20 @@ } | ||
"devDependencies": { | ||
"@babel/cli": "^7.12.1", | ||
"@babel/core": "^7.7.7", | ||
"@babel/preset-env": "^7.7.7", | ||
"@types/async": "^3.0.3", | ||
"@babel/cli": "^7.12.8", | ||
"@babel/core": "^7.12.9", | ||
"@babel/preset-env": "^7.12.7", | ||
"@babel/runtime": "^7.12.5", | ||
"@types/async": "^3.2.4", | ||
"@types/got": "^9.6.11", | ||
"@types/is-url": "^1.2.28", | ||
"@types/mocha": "^5.2.7", | ||
"async": "^3.1.0", | ||
"babel-plugin-add-module-exports": "^1.0.2", | ||
"documentation": "^12.1.4", | ||
"eslint": "^4.18.2", | ||
"is-url": "^1.2.2", | ||
"mocha": "^5.2.0", | ||
"@types/mocha": "^8.0.4", | ||
"async": "^3.2.0", | ||
"babel-plugin-add-module-exports": "^1.0.4", | ||
"babel-preset-minify": "^0.5.1", | ||
"documentation": "^13.1.0", | ||
"eslint": "^7.14.0", | ||
"is-url": "^1.2.4", | ||
"mocha": "^8.2.1", | ||
"should": "^13.2.3", | ||
"ts-node": "^8.6.1", | ||
"typescript": "^3.9.7" | ||
"ts-node": "^9.0.0", | ||
"typescript": "^4.1.2" | ||
}, | ||
@@ -79,0 +82,0 @@ "dependencies": { |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Minified code
QualityThis package contains minified code. This may be harmless in some cases where minified code is included in packaged libraries, however packages on npm should not minify code.
Found 3 instances in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
11443
18
37
3
1