sitemap-stream-parser
Advanced tools
Comparing version
21
index.js
@@ -21,3 +21,4 @@ // Generated by CoffeeScript 1.12.7 | ||
agentOptions = { | ||
keepAlive: true | ||
keepAlive: true, | ||
gzip: true | ||
}; | ||
@@ -39,5 +40,5 @@ | ||
SitemapParser.prototype._download = function(url, parserStream) { | ||
var unzip; | ||
if (url.lastIndexOf('gz') === url.length - 2) { | ||
SitemapParser.prototype._download = function(url, parserStream, done) { | ||
var stream, unzip; | ||
if (url.lastIndexOf('.gz') === url.length - 3) { | ||
unzip = zlib.createUnzip(); | ||
@@ -49,6 +50,12 @@ return request.get({ | ||
} else { | ||
return request.get({ | ||
stream = request.get({ | ||
url: url, | ||
gzip: true | ||
}).pipe(parserStream); | ||
}); | ||
stream.on('error', (function(_this) { | ||
return function(err) { | ||
return done(err); | ||
}; | ||
})(this)); | ||
return stream.pipe(parserStream); | ||
} | ||
@@ -105,3 +112,3 @@ }; | ||
})(this)); | ||
return this._download(url, parserStream); | ||
return this._download(url, parserStream, done); | ||
}; | ||
@@ -108,0 +115,0 @@ |
{ | ||
"name": "sitemap-stream-parser", | ||
"version": "1.3.0", | ||
"version": "1.4.0", | ||
"description": "Get a list of URLs from one or more sitemaps", | ||
@@ -22,6 +22,6 @@ "main": "index.js", | ||
"dependencies": { | ||
"async": "^1.5.0", | ||
"commander": "^2.11.0", | ||
"request": "^2.67.0", | ||
"sax": "^1.1.4" | ||
"async": "^2.6.1", | ||
"commander": "^2.15.1", | ||
"request": "^2.87.0", | ||
"sax": "^1.2.4" | ||
}, | ||
@@ -28,0 +28,0 @@ "repository": { |
# node-sitemap-stream-parser | ||
A streaming parser for sitemap files. It is able to deal with GBs of deeply nested sitemaps with hundreds of URLs in them. Maximum memory usage is just over 100Mb at any time. | ||
#Usage | ||
## Usage | ||
The main method to extract URLs for a site is with the `parseSitemaps(urls, url_cb, done)` method. You can call it with both a single URL or an Array of URLs. The `url_cb` is called for every URL that is found. The `done` callback is passed an error and/or a list of all the sitemaps that were checked. | ||
Example: | ||
## Examples: | ||
@@ -38,7 +38,7 @@ ``` javascript | ||
sitemaps.sitemapsInRobots('http://example.com/robots.txt', function(err, urls) { | ||
if(urls.length > 0) { | ||
sitemaps.parseSitemaps(urls, console.log, function(err, sitemaps) { | ||
console.log(sitemaps); | ||
}); | ||
} | ||
if(err || !urls || urls.length == 0) | ||
return; | ||
sitemaps.parseSitemaps(urls, console.log, function(err, sitemaps) { | ||
console.log(sitemaps); | ||
}); | ||
}); |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
8
14.29%186
8.14%31024
-1.7%+ Added
+ Added
- Removed
Updated
Updated
Updated
Updated