supercrawler
Advanced tools
Comparing version 1.6.0 to 1.7.0
@@ -344,2 +344,4 @@ var Crawler, | ||
match = true; | ||
} else if (Array.isArray(handlerContentType) && (handlerContentType).indexOf(contentType) > -1) { | ||
match = true; | ||
} else if ((contentType + "/").indexOf(handlerContentType + "/") === 0) { | ||
@@ -346,0 +348,0 @@ match = true; |
{ | ||
"name": "supercrawler", | ||
"description": "A web crawler. Supercrawler automatically crawls websites. Define custom handlers to parse content. Obeys robots.txt, rate limits and concurrency limits.", | ||
"version": "1.6.0", | ||
"version": "1.7.0", | ||
"homepage": "https://github.com/brendonboshell/supercrawler", | ||
@@ -6,0 +6,0 @@ "author": "Brendon Boshell <brendonboshell@gmail.com>", |
@@ -102,2 +102,5 @@ # Node.js Web Crawler | ||
// Match an array of content-type | ||
crawler.addHandler(["text/plain", "text/html"], myCustomHandler); | ||
// Custom content handler for HTML pages. | ||
@@ -158,3 +161,3 @@ crawler.addHandler("text/html", function (context) { | ||
| addHandler(handler) | Add a handler for all content types. | | ||
| addHandler(contentType, handler) | Add a handler for a specific content type. | | ||
| addHandler(contentType, handler) | Add a handler for a specific content type. If `contentType` is a string, then (for example) 'text' will match 'text/html', 'text/plain', etc. If `contentType` is an array of strings, the page content type must match exactly. | | ||
@@ -369,2 +372,6 @@ The `Crawler` object fires the following events: | ||
### 1.7.0 | ||
* [Changed] `Crawler#addHandler` can now take an array of content-type to match, thanks [taina0407](https://github.com/taina0407). | ||
### 1.6.0 | ||
@@ -371,0 +378,0 @@ |
@@ -804,2 +804,40 @@ var proxyquire = require('proxyquire'), | ||
it("fires for a array content type", function (done) { | ||
var crawler = new Crawler({ | ||
interval: 100 | ||
}); | ||
pageContentType = "text/html"; | ||
crawler.addHandler(["text/plain", "text/html"], handler); | ||
crawler.start(); | ||
setTimeout(function () { | ||
crawler.stop(); | ||
expect(handler.calledWith(sinon.match({ | ||
body: sinon.match(new Buffer("<html><body>test</body></html>")), | ||
url: "https://example.com/index1.html" | ||
}))).to.equal(true); | ||
done(); | ||
}, 200); | ||
}); | ||
it("can hold fire for a array content type", function (done) { | ||
var crawler = new Crawler({ | ||
interval: 100 | ||
}); | ||
pageContentType = "text/xml"; | ||
crawler.addHandler(["text/plain", "text/html"], handler); | ||
crawler.start(); | ||
setTimeout(function () { | ||
crawler.stop(); | ||
expect(handler.calledWith(sinon.match({ | ||
body: sinon.match(new Buffer("<html><body>test</body></html>")), | ||
url: "https://example.com/index1.html" | ||
}))).to.equal(false); | ||
done(); | ||
}, 200); | ||
}); | ||
it("can fire when content type determined from extension", function (done) { | ||
@@ -806,0 +844,0 @@ var crawler = new Crawler({ |
130791
3176
569