supercrawler - npm Package Compare versions

Comparing version 0.16.0 to 0.16.1

lib/Crawler.js

		@@ -414,4 +414,4 @@ var Crawler,

		// if robots returns a 404, we assume there are no restrictions.
		if (robotsStatusCode === 404) {
		// if robots returns a 404 or 410, we assume there are no restrictions.
		if (robotsStatusCode === 404 \|\| robotsStatusCode === 410) {
		return Promise.resolve({
		@@ -418,0 +418,0 @@ statusCode: 200,

package.json

		{
		"name": "supercrawler",
		"description": "A web crawler. Supercrawler automatically crawls websites. Define custom handlers to parse content. Obeys robots.txt, rate limits and concurrency limits.",
		"version": "0.16.0",
		"version": "0.16.1",
		"homepage": "https://github.com/brendonboshell/supercrawler",
		@@ -6,0 +6,0 @@ "author": "Brendon Boshell <brendonboshell@gmail.com>",

README.md

		@@ -350,2 +350,6 @@ # Node.js Web Crawler

		### 0.16.1

		* [Fixed] Treats 410 the same as 404 for robots.txt requests.

		### 0.16.0
		@@ -352,0 +356,0 @@

test/Crawler.spec.js

		@@ -450,2 +450,17 @@ var proxyquire = require('proxyquire'),

		it("crawls all pages if robots.txt is 410", function (done) {
		var crawler = new Crawler({
		interval: 10
		});

		crawler.start();
		robotsStatusCode = 410;

		setTimeout(function () {
		crawler.stop();
		expect(numCrawlsOfUrl("https://example.com/index17.html", false)).to.equal(1);
		done();
		}, 200);
		});

		it("excludes all pages if robots.txt could not be crawled", function (done) {
		@@ -452,0 +467,0 @@ var crawler = new Crawler({

New alerts