sitemap-stream-parser
Advanced tools
Comparing version 1.3.0 to 1.4.0
21
index.js
@@ -21,3 +21,4 @@ // Generated by CoffeeScript 1.12.7 | ||
agentOptions = { | ||
keepAlive: true | ||
keepAlive: true, | ||
gzip: true | ||
}; | ||
@@ -39,5 +40,5 @@ | ||
SitemapParser.prototype._download = function(url, parserStream) { | ||
var unzip; | ||
if (url.lastIndexOf('gz') === url.length - 2) { | ||
SitemapParser.prototype._download = function(url, parserStream, done) { | ||
var stream, unzip; | ||
if (url.lastIndexOf('.gz') === url.length - 3) { | ||
unzip = zlib.createUnzip(); | ||
@@ -49,6 +50,12 @@ return request.get({ | ||
} else { | ||
return request.get({ | ||
stream = request.get({ | ||
url: url, | ||
gzip: true | ||
}).pipe(parserStream); | ||
}); | ||
stream.on('error', (function(_this) { | ||
return function(err) { | ||
return done(err); | ||
}; | ||
})(this)); | ||
return stream.pipe(parserStream); | ||
} | ||
@@ -105,3 +112,3 @@ }; | ||
})(this)); | ||
return this._download(url, parserStream); | ||
return this._download(url, parserStream, done); | ||
}; | ||
@@ -108,0 +115,0 @@ |
{ | ||
"name": "sitemap-stream-parser", | ||
"version": "1.3.0", | ||
"version": "1.4.0", | ||
"description": "Get a list of URLs from one or more sitemaps", | ||
@@ -22,6 +22,6 @@ "main": "index.js", | ||
"dependencies": { | ||
"async": "^1.5.0", | ||
"commander": "^2.11.0", | ||
"request": "^2.67.0", | ||
"sax": "^1.1.4" | ||
"async": "^2.6.1", | ||
"commander": "^2.15.1", | ||
"request": "^2.87.0", | ||
"sax": "^1.2.4" | ||
}, | ||
@@ -28,0 +28,0 @@ "repository": { |
# node-sitemap-stream-parser | ||
A streaming parser for sitemap files. It is able to deal with GBs of deeply nested sitemaps with hundreds of URLs in them. Maximum memory usage is just over 100Mb at any time. | ||
#Usage | ||
## Usage | ||
The main method to extract URLs for a site is with the `parseSitemaps(urls, url_cb, done)` method. You can call it with both a single URL or an Array of URLs. The `url_cb` is called for every URL that is found. The `done` callback is passed an error and/or a list of all the sitemaps that were checked. | ||
Example: | ||
## Examples: | ||
@@ -38,7 +38,7 @@ ``` javascript | ||
sitemaps.sitemapsInRobots('http://example.com/robots.txt', function(err, urls) { | ||
if(urls.length > 0) { | ||
sitemaps.parseSitemaps(urls, console.log, function(err, sitemaps) { | ||
console.log(sitemaps); | ||
}); | ||
} | ||
if(err || !urls || urls.length == 0) | ||
return; | ||
sitemaps.parseSitemaps(urls, console.log, function(err, sitemaps) { | ||
console.log(sitemaps); | ||
}); | ||
}); |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
8
186
31024
+ Addedasync@2.6.4(transitive)
+ Addedlodash@4.17.21(transitive)
- Removedasync@1.5.2(transitive)
Updatedasync@^2.6.1
Updatedcommander@^2.15.1
Updatedrequest@^2.87.0
Updatedsax@^1.2.4