Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

supercrawler

Package Overview
Dependencies
Maintainers
1
Versions
45
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

supercrawler - npm Package Compare versions

Comparing version 0.7.0 to 0.8.0

15

lib/handlers/sitemapsParser.js

@@ -28,2 +28,5 @@ var cheerio = require("cheerio"),

return xmlBufProm.then(function (xmlBuf) {
var locUrls,
linkUrls;
var $ = cheerio.load(xmlBuf, {

@@ -33,11 +36,13 @@ xmlMode: true

return $("sitemapindex > sitemap > loc, urlset > url > loc").map(function () {
var $this;
locUrls = $("sitemapindex > sitemap > loc, urlset > url > loc").map(function () {
return $(this).text();
}).get();
$this = $(this);
linkUrls = $("urlset > url > xhtml\\:link[href][rel=alternate]").map(function () {
return $(this).attr("href");
}).get();
return $this.text();
}).get();
return locUrls.concat(linkUrls);
});
};
};
{
"name": "supercrawler",
"description": "A web crawler. Supercrawler automatically crawls websites. Define custom handlers to parse content. Obeys robots.txt, rate limits and concurrency limits.",
"version": "0.7.0",
"version": "0.8.0",
"homepage": "https://github.com/brendonboshell/supercrawler",

@@ -6,0 +6,0 @@ "author": "Brendon Boshell <brendonboshell@gmail.com>",

@@ -304,2 +304,7 @@ # Node.js Web Crawler

### 0.8.0
* [Changed] Sitemaps parser now extracts `<xhtml:link rel="alternate">` URLs,
in addition to the `<loc>` URLs.
### 0.7.0

@@ -309,3 +314,3 @@

a large list of URLs into the crawl queue.
* [Updated] `DbUrlList` supports the bulk insert method.
* [Changed] `DbUrlList` supports the bulk insert method.

@@ -312,0 +317,0 @@ ### 0.6.1

@@ -61,2 +61,22 @@ var sitemapsParser = require("../../lib/handlers/sitemapsParser"),

it("discovers an alternate link", function (done) {
urlset = [
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>",
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:xhtml=\"http://www.w3.org/1999/xhtml\" >",
"<url>",
"<loc>https://example.com/home.html</loc>",
"<xhtml:link rel=\"alternate\" hreflang=\"de\" href=\"https://example.com/home-de.html\" />",
"</url>",
"</urlset>]"
].join("\n");
sp(new Buffer(urlset), "http://example.com/sitemap_index.xml").then(function (urls) {
expect(urls).to.deep.equal([
"https://example.com/home.html",
"https://example.com/home-de.html"
]);
done();
});
});
it("supports a .gz sitemap file", function (done) {

@@ -63,0 +83,0 @@ Promise.promisify(zlib.gzip)(new Buffer(urlset)).then(function (buf) {

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc