supercrawler - npm Package Compare versions

supercrawler

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Comparing version 1.6.0 to 1.7.0

lib/Crawler.js

		@@ -344,2 +344,4 @@ var Crawler,
		match = true;
		} else if (Array.isArray(handlerContentType) && (handlerContentType).indexOf(contentType) > -1) {
		match = true;
		} else if ((contentType + "/").indexOf(handlerContentType + "/") === 0) {
		@@ -346,0 +348,0 @@ match = true;

package.json

		{
		"name": "supercrawler",
		"description": "A web crawler. Supercrawler automatically crawls websites. Define custom handlers to parse content. Obeys robots.txt, rate limits and concurrency limits.",
		"version": "1.6.0",
		"version": "1.7.0",
		"homepage": "https://github.com/brendonboshell/supercrawler",
		@@ -6,0 +6,0 @@ "author": "Brendon Boshell <brendonboshell@gmail.com>",

README.md

		@@ -102,2 +102,5 @@ # Node.js Web Crawler

		// Match an array of content-type
		crawler.addHandler(["text/plain", "text/html"], myCustomHandler);

		// Custom content handler for HTML pages.
		@@ -158,3 +161,3 @@ crawler.addHandler("text/html", function (context) {
		\| addHandler(handler) \| Add a handler for all content types. \|
		\| addHandler(contentType, handler) \| Add a handler for a specific content type. \|
		\| addHandler(contentType, handler) \| Add a handler for a specific content type. If `contentType` is a string, then (for example) 'text' will match 'text/html', 'text/plain', etc. If `contentType` is an array of strings, the page content type must match exactly. \|

		@@ -369,2 +372,6 @@ The `Crawler` object fires the following events:

		### 1.7.0

		* [Changed] `Crawler#addHandler` can now take an array of content-type to match, thanks [taina0407](https://github.com/taina0407).

		### 1.6.0
		@@ -371,0 +378,0 @@

test/Crawler.spec.js

		@@ -804,2 +804,40 @@ var proxyquire = require('proxyquire'),

		it("fires for a array content type", function (done) {
		var crawler = new Crawler({
		interval: 100
		});

		pageContentType = "text/html";
		crawler.addHandler(["text/plain", "text/html"], handler);
		crawler.start();

		setTimeout(function () {
		crawler.stop();
		expect(handler.calledWith(sinon.match({
		body: sinon.match(new Buffer("<html><body>test</body></html>")),
		url: "https://example.com/index1.html"
		}))).to.equal(true);
		done();
		}, 200);
		});

		it("can hold fire for a array content type", function (done) {
		var crawler = new Crawler({
		interval: 100
		});

		pageContentType = "text/xml";
		crawler.addHandler(["text/plain", "text/html"], handler);
		crawler.start();

		setTimeout(function () {
		crawler.stop();
		expect(handler.calledWith(sinon.match({
		body: sinon.match(new Buffer("<html><body>test</body></html>")),
		url: "https://example.com/index1.html"
		}))).to.equal(false);
		done();
		}, 200);
		});

		it("can fire when content type determined from extension", function (done) {
		@@ -806,0 +844,0 @@ var crawler = new Crawler({

Improved metrics