Socket
Socket
Sign inDemoInstall

supercrawler

Package Overview
Dependencies
179
Maintainers
1
Versions
45
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 1.6.0 to 1.7.0

2

lib/Crawler.js

@@ -344,2 +344,4 @@ var Crawler,

match = true;
} else if (Array.isArray(handlerContentType) && (handlerContentType).indexOf(contentType) > -1) {
match = true;
} else if ((contentType + "/").indexOf(handlerContentType + "/") === 0) {

@@ -346,0 +348,0 @@ match = true;

2

package.json
{
"name": "supercrawler",
"description": "A web crawler. Supercrawler automatically crawls websites. Define custom handlers to parse content. Obeys robots.txt, rate limits and concurrency limits.",
"version": "1.6.0",
"version": "1.7.0",
"homepage": "https://github.com/brendonboshell/supercrawler",

@@ -6,0 +6,0 @@ "author": "Brendon Boshell <brendonboshell@gmail.com>",

@@ -102,2 +102,5 @@ # Node.js Web Crawler

// Match an array of content-type
crawler.addHandler(["text/plain", "text/html"], myCustomHandler);
// Custom content handler for HTML pages.

@@ -158,3 +161,3 @@ crawler.addHandler("text/html", function (context) {

| addHandler(handler) | Add a handler for all content types. |
| addHandler(contentType, handler) | Add a handler for a specific content type. |
| addHandler(contentType, handler) | Add a handler for a specific content type. If `contentType` is a string, then (for example) 'text' will match 'text/html', 'text/plain', etc. If `contentType` is an array of strings, the page content type must match exactly. |

@@ -369,2 +372,6 @@ The `Crawler` object fires the following events:

### 1.7.0
* [Changed] `Crawler#addHandler` can now take an array of content-type to match, thanks [taina0407](https://github.com/taina0407).
### 1.6.0

@@ -371,0 +378,0 @@

@@ -804,2 +804,40 @@ var proxyquire = require('proxyquire'),

it("fires for a array content type", function (done) {
var crawler = new Crawler({
interval: 100
});
pageContentType = "text/html";
crawler.addHandler(["text/plain", "text/html"], handler);
crawler.start();
setTimeout(function () {
crawler.stop();
expect(handler.calledWith(sinon.match({
body: sinon.match(new Buffer("<html><body>test</body></html>")),
url: "https://example.com/index1.html"
}))).to.equal(true);
done();
}, 200);
});
it("can hold fire for a array content type", function (done) {
var crawler = new Crawler({
interval: 100
});
pageContentType = "text/xml";
crawler.addHandler(["text/plain", "text/html"], handler);
crawler.start();
setTimeout(function () {
crawler.stop();
expect(handler.calledWith(sinon.match({
body: sinon.match(new Buffer("<html><body>test</body></html>")),
url: "https://example.com/index1.html"
}))).to.equal(false);
done();
}, 200);
});
it("can fire when content type determined from extension", function (done) {

@@ -806,0 +844,0 @@ var crawler = new Crawler({

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc