supercrawler
Advanced tools
Comparing version 0.2.1 to 0.2.2
var DbUrlList, | ||
Promise = require("bluebird"), | ||
Url = require("./Url"), | ||
Sequelize = require('sequelize'); | ||
Sequelize = require('sequelize'), | ||
crypto = require("crypto"); | ||
@@ -33,3 +34,3 @@ /** | ||
this._urlTable = this._db.define('url', { | ||
url: { | ||
urlHash: { | ||
type: Sequelize.STRING, | ||
@@ -39,2 +40,6 @@ allowNull: false, | ||
}, | ||
url: { | ||
type: Sequelize.STRING(10000), | ||
allowNull: false | ||
}, | ||
statusCode: { | ||
@@ -89,6 +94,10 @@ type: Sequelize.STRING, | ||
DbUrlList.prototype.insertIfNotExists = function (url) { | ||
var self = this; | ||
var self = this, | ||
urlHash; | ||
urlHash = crypto.createHash('sha1').update(url.getUrl()).digest("hex"); | ||
return this._getUrlTable().then(function (urlTable) { | ||
return urlTable.create({ | ||
urlHash: urlHash, | ||
url: url.getUrl(), | ||
@@ -136,8 +145,11 @@ statusCode: url.getStatusCode(), | ||
DbUrlList.prototype.upsert = function (url) { | ||
var self = this; | ||
var self = this, | ||
urlHash; | ||
urlHash = crypto.createHash('sha1').update(url.getUrl()).digest("hex"); | ||
return this._getUrlTable().then(function (urlTable) { | ||
return urlTable.findOne({ | ||
where: { | ||
url: url.getUrl() | ||
urlHash: urlHash | ||
} | ||
@@ -152,2 +164,3 @@ }).then(function (record) { | ||
return urlTable.upsert({ | ||
urlHash: urlHash, | ||
url: url.getUrl(), | ||
@@ -154,0 +167,0 @@ statusCode: url.getStatusCode(), |
{ | ||
"name": "supercrawler", | ||
"description": "A web crawler. Supercrawler automatically crawls websites. Define custom handlers to parse content. Obeys robots.txt, rate limits and concurrency limits.", | ||
"version": "0.2.1", | ||
"version": "0.2.2", | ||
"homepage": "https://github.com/brendonboshell/supercrawler", | ||
@@ -6,0 +6,0 @@ "author": "Brendon Boshell <brendonboshell@gmail.com>", |
@@ -82,2 +82,5 @@ var expect = require("chai").expect, | ||
sequelizeMock.STRING = function (size) { | ||
return "TEST_STRING" + size; | ||
}; | ||
sequelizeMock.prototype.define = defineSpy; | ||
@@ -112,6 +115,9 @@ sequelizeMock.prototype.UniqueConstraintError = function () { }; | ||
sinon.assert.calledWith(defineSpy, "url", sinon.match({ | ||
url: { | ||
urlHash: { | ||
allowNull: false, | ||
unique: true | ||
}, | ||
url: { | ||
allowNull: false | ||
}, | ||
statusCode: { | ||
@@ -132,2 +138,11 @@ allowNull: true | ||
it("url field is max 10,000 characters long", function () { | ||
new DbUrlList(opts); | ||
sinon.assert.calledWith(defineSpy, "url", sinon.match({ | ||
url: { | ||
type: "TEST_STRING10000" | ||
} | ||
})); | ||
}); | ||
describe("#insertIfNotExists", function () { | ||
@@ -155,2 +170,3 @@ it("creates the url table", function (done) { | ||
sinon.assert.calledWith(createSpy, sinon.match({ | ||
urlHash: "327c3fda87ce286848a574982ddd0b7c7487f816", | ||
url: "https://example.com", | ||
@@ -173,2 +189,3 @@ statusCode: 201, | ||
sinon.assert.calledWith(createSpy, sinon.match({ | ||
urlHash: "b559c7edd3fb67374c1a25e739cdd7edd1d79949", | ||
url: "https://example.com/", | ||
@@ -197,2 +214,3 @@ statusCode: 600, | ||
sinon.assert.calledWith(upsertSpy, sinon.match({ | ||
urlHash: "327c3fda87ce286848a574982ddd0b7c7487f816", | ||
url: "https://example.com", | ||
@@ -216,2 +234,3 @@ statusCode: 201, | ||
sinon.assert.calledWith(upsertSpy, sinon.match({ | ||
urlHash: "b559c7edd3fb67374c1a25e739cdd7edd1d79949", | ||
url: "https://example.com/", | ||
@@ -237,2 +256,3 @@ statusCode: 600, | ||
sinon.assert.calledWith(upsertSpy, sinon.match({ | ||
urlHash: "b559c7edd3fb67374c1a25e739cdd7edd1d79949", | ||
url: "https://example.com/", | ||
@@ -239,0 +259,0 @@ statusCode: 600, |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
70978
1744