Comparing version 0.1.10 to 0.1.11
@@ -225,3 +225,3 @@ const extend = require('extend'); | ||
const queuable = this._createQueuable(request); | ||
return request.newQueue ? this.queues.push(queuable, request.newQueue) : this.queues.repush(request, queuable); | ||
return this.queues.repush(request, queuable); | ||
}); | ||
@@ -366,4 +366,6 @@ } | ||
} | ||
this._logStartEnd('processing', request, () => request.document = this.processor.process(request)); | ||
return Q(request); | ||
return this._logStartEnd('processing', request, () => { | ||
request.document = this.processor.process(request); | ||
return request; | ||
}); | ||
} | ||
@@ -377,4 +379,7 @@ | ||
return Q | ||
.try(work) | ||
.then(workResult => { result = workResult; return result; }) | ||
.try(() => { return work(); }) | ||
.then(workResult => { | ||
result = workResult; | ||
return result; | ||
}) | ||
.finally(() => { | ||
@@ -381,0 +386,0 @@ // in the getRequest case we did not have a request to start. Report on the one we found. |
@@ -39,3 +39,3 @@ const parse = require('parse-link-header'); | ||
const url = request.url + `?page=${i}&per_page=100`; | ||
const newRequest = new Request(request.type, url, { qualifier: request.context.qualifier, elementType: request.context.elementType }); | ||
const newRequest = new Request(request.type, url, request.context); | ||
newRequest.policy = request.policy; | ||
@@ -42,0 +42,0 @@ requests.push(newRequest); |
@@ -108,3 +108,3 @@ /** | ||
static reprocess() { | ||
static reprocessAndUpdate() { | ||
return new TraversalPolicy('originStorage', 'matchOrVersion', 'documentAndRelated', 'deepDeep'); | ||
@@ -145,4 +145,3 @@ } | ||
const transitivity = { shallow: 'shallow', deepShallow: 'deepShallow', deepDeep: 'deepShallow' }[this.transitivity]; | ||
const freshness = { shallow: this.freshness, deepShallow: this.freshness, deepDeep: 'always' }[this.transitivity]; | ||
return new TraversalPolicy(this.fetch, freshness, this.processing, transitivity); | ||
return new TraversalPolicy(this.fetch, this.freshness, this.processing, transitivity); | ||
} | ||
@@ -149,0 +148,0 @@ |
{ | ||
"name": "ghcrawler", | ||
"version": "0.1.10", | ||
"version": "0.1.11", | ||
"description": "A robust GitHub API crawler that walks a queue of GitHub entities retrieving and storing their contents.", | ||
@@ -5,0 +5,0 @@ "main": "./index.js", |
@@ -337,4 +337,3 @@ const assert = require('chai').assert; | ||
request => assert.fail(), | ||
error => expect(error.message.startsWith('Code: 500')).to.be.true | ||
); | ||
error => expect(error.message.startsWith('Code 500')).to.be.true); | ||
}); | ||
@@ -561,9 +560,7 @@ | ||
const crawler = createBaseCrawler({ queues: queues }); | ||
for (let i = 0; i < 5; i++) { | ||
const request = new Request('test', 'http://api.github.com/repo/microsoft/test'); | ||
request.markRequeue(); | ||
request._originQueue = normal; | ||
request.attemptCount = i === 0 ? null : i; | ||
crawler._requeue(request); | ||
expect(request.promises.length).to.be.equal(1); | ||
const request = new Request('test', 'http://api.github.com/repo/microsoft/test'); | ||
request.markRequeue(); | ||
request._originQueue = normal; | ||
return crawler._requeue(request).then(() => { | ||
// expect(request.promises.length).to.be.equal(1); | ||
expect(queue.length).to.be.equal(1); | ||
@@ -573,6 +570,4 @@ expect(queue[0] !== request).to.be.true; | ||
expect(queue[0].url === request.url).to.be.true; | ||
expect(queue[0].attemptCount).to.be.equal(i + 1); | ||
// pop the request to get ready for the next iteration | ||
queue.shift(); | ||
} | ||
expect(queue[0].attemptCount).to.be.equal(1); | ||
}); | ||
}); | ||
@@ -591,10 +586,10 @@ | ||
const crawler = createBaseCrawler({ queues: queues }); | ||
crawler._requeue(request); | ||
expect(request.promises.length).to.be.equal(1); | ||
expect(queue.length).to.be.equal(0); | ||
expect(deadletterQueue.length).to.be.equal(1); | ||
expect(deadletterQueue[0] !== request).to.be.true; | ||
expect(deadletterQueue[0].type === request.type).to.be.true; | ||
expect(deadletterQueue[0].url === request.url).to.be.true; | ||
expect(deadletterQueue[0].attemptCount).to.be.equal(6); | ||
return crawler._requeue(request).then(() => { | ||
expect(queue.length).to.be.equal(0); | ||
expect(deadletterQueue.length).to.be.equal(1); | ||
expect(deadletterQueue[0] !== request).to.be.true; | ||
expect(deadletterQueue[0].type === request.type).to.be.true; | ||
expect(deadletterQueue[0].url === request.url).to.be.true; | ||
expect(deadletterQueue[0].attemptCount).to.be.equal(6); | ||
}); | ||
}); | ||
@@ -707,8 +702,6 @@ }); | ||
it('still dequeues and unlocks if promises fail', () => { | ||
const done = []; | ||
const unlock = []; | ||
const normal = createBaseQueue('normal', { done: request => { done.push(request); return Q(); } }); | ||
it('requeues and unlocks if promises fail', () => { | ||
const normal = createBaseQueue('normal', { push: sinon.spy(() => { return Q(); }) }); | ||
const queues = createBaseQueues({ normal: normal }); | ||
const locker = createBaseLocker({ unlock: request => { unlock.push(request); return Q(); } }); | ||
const locker = createBaseLocker({ unlock: sinon.spy(() => { return Q(); }) }); | ||
const originalRequest = new Request('test', 'http://test.com'); | ||
@@ -722,6 +715,8 @@ originalRequest.lock = 42; | ||
error => { | ||
expect(done.length).to.be.equal(1); | ||
expect(done[0] === originalRequest).to.be.true; | ||
expect(unlock.length).to.be.equal(1); | ||
expect(unlock[0]).to.be.equal(42); | ||
expect(normal.push.callCount).to.be.equal(1); | ||
const requeued = normal.push.getCall(0).args[0]; | ||
expect(requeued.type).to.be.equal(originalRequest.type); | ||
expect(requeued.url).to.be.equal(originalRequest.url); | ||
expect(locker.unlock.callCount).to.be.equal(1); | ||
expect(locker.unlock.getCall(0).args[0]).to.be.equal(42); | ||
}); | ||
@@ -870,3 +865,3 @@ }); | ||
return Q.try(() => { | ||
crawler._processDocument(originalRequest) | ||
return crawler._processDocument(originalRequest) | ||
}).then( | ||
@@ -997,3 +992,3 @@ request => assert.fail(), | ||
return crawler._run(context).then(() => { | ||
expect(context.currentDelay).to.be.approximately(451, 4); | ||
expect(context.currentDelay).to.be.approximately(451, 10); | ||
}); | ||
@@ -1327,4 +1322,10 @@ }); | ||
logger: logger, | ||
ttl: 1000, | ||
weights: [1] | ||
weights: [1], | ||
parallelPush: 10, | ||
attenuation: { | ||
ttl: 1000 | ||
}, | ||
tracker: { | ||
ttl: 6 * 60 * 1000 | ||
} | ||
}, | ||
@@ -1342,5 +1343,9 @@ storage: { | ||
logger: logger, | ||
tokenLowerBound: 50, | ||
processingTtl: 60 * 1000, | ||
promiseTrace: false, | ||
orgList: [], | ||
fetcher: { | ||
tokenLowerBound: 50, | ||
forbiddenDelay: 120000 | ||
} | ||
}, | ||
@@ -1359,2 +1364,3 @@ requestor: { | ||
const result = { name: name }; | ||
result.getName = () => { return name; }; | ||
result.pop = pop || (() => assert.fail('should not pop')); | ||
@@ -1361,0 +1367,0 @@ result.push = push || (() => assert.fail('should not push')); |
@@ -48,3 +48,3 @@ const assert = require('chai').assert; | ||
it('should queue collection pages as deepShallow and elements as deepShallow', () => { | ||
const request = new Request('issues', 'http://test.com/issues'); | ||
const request = new Request('issues', 'http://test.com/issues', { elementType: 'issue' }); | ||
request.policy.transitivity = 'deepShallow'; | ||
@@ -78,3 +78,3 @@ request.response = { | ||
it('should queue deepShallow root collections as deepShallow and elements as shallow', () => { | ||
const request = new Request('orgs', 'http://test.com/orgs'); | ||
const request = new Request('orgs', 'http://test.com/orgs', { elementType: 'org' }); | ||
request.policy.transitivity = 'deepShallow'; | ||
@@ -109,3 +109,3 @@ request.response = { | ||
it('should queue forceForce root collection pages as forceForce and elements as forceNormal', () => { | ||
const request = new Request('orgs', 'http://test.com/orgs'); | ||
const request = new Request('orgs', 'http://test.com/orgs', { elementType: 'org' }); | ||
request.policy = TraversalPolicy.update(); | ||
@@ -139,3 +139,3 @@ request.response = { | ||
it('should queue forceForce page elements with forceNormal transitivity', () => { | ||
const request = new Request('orgs', 'http://test.com/orgs?page=2&per_page=100'); | ||
const request = new Request('orgs', 'http://test.com/orgs?page=2&per_page=100', { elementType: 'org' }); | ||
request.policy = TraversalPolicy.update(); | ||
@@ -169,3 +169,3 @@ request.document = { _metadata: { links: {} }, elements: [{ url: 'http://child1' }] }; | ||
expect(teamsRequest.context.qualifier).to.be.equal('urn:repo:42'); | ||
expect(teamsRequest.context.relation).to.be.deep.equal({ origin: 'repo', name: 'teams', type: 'team' } ); | ||
expect(teamsRequest.context.relation).to.be.deep.equal({ origin: 'repo', name: 'teams', type: 'team' }); | ||
@@ -178,7 +178,9 @@ request.crawler.queue.reset(); | ||
const links = teamsPage._metadata.links; | ||
expect(links.teams.type).to.be.equal('self'); | ||
expect(links.teams.hrefs.length).to.be.equal(1); | ||
expect(links.teams.hrefs[0]).to.be.equal('urn:team:13'); | ||
expect(links.repo.type).to.be.equal('self'); | ||
expect(links.resources.type).to.be.equal('resource'); | ||
expect(links.resources.hrefs.length).to.be.equal(1); | ||
expect(links.resources.hrefs[0]).to.be.equal('urn:team:13'); | ||
expect(links.repo.type).to.be.equal('resource'); | ||
expect(links.repo.href).to.be.equal('urn:repo:42'); | ||
expect(links.origin.type).to.be.equal('resource'); | ||
expect(links.origin.href).to.be.equal('urn:repo:42'); | ||
@@ -196,7 +198,7 @@ const teamRequest = request.crawler.queue.getCall(0).args[0]; | ||
expect(membersRequest.context.qualifier).to.be.equal('urn:team:54'); | ||
expect(membersRequest.context.relation).to.be.equal('team_members_relation'); | ||
expect(membersRequest.context.relation).to.be.deep.equal({ name: 'members', origin: 'team', type: 'user' }); | ||
const reposRequest = request.crawler.queue.getCall(1).args[0]; | ||
expect(reposRequest.url).to.be.equal('http://team1/repos'); | ||
expect(reposRequest.context.qualifier).to.be.equal('urn:team:54'); | ||
expect(reposRequest.context.relation).to.be.equal('team_repos_relation'); | ||
expect(reposRequest.context.relation).to.be.deep.equal({ name: 'repos', origin: 'team', type: 'repo' }); | ||
}); | ||
@@ -203,0 +205,0 @@ }); |
@@ -11,3 +11,3 @@ const assert = require('chai').assert; | ||
it('should throw on duplicate queue names', () => { | ||
expect(() => new QueueSet([{ name: '1' }, { name: '1' }])).to.throw(Error); | ||
expect(() => new QueueSet([createBaseQueue('1'), createBaseQueue('1')])).to.throw(Error); | ||
}); | ||
@@ -18,3 +18,3 @@ }); | ||
it('should create a simple startMap', () => { | ||
const set = new QueueSet([{ name: '1' }, { name: '2' }], null, createOptions([3, 2])); | ||
const set = new QueueSet([createBaseQueue('1'), createBaseQueue('2')], null, createOptions([3, 2])); | ||
expect(set.startMap.length).to.be.equal(5); | ||
@@ -28,3 +28,3 @@ expect(set.startMap[0]).to.be.equal(0); | ||
it('should create a default startMap if no weights given', () => { | ||
const set = new QueueSet([{ name: '1' }, { name: '2' }]); | ||
const set = new QueueSet([createBaseQueue('1'), createBaseQueue('2')]); | ||
expect(set.startMap.length).to.be.equal(1); | ||
@@ -35,11 +35,11 @@ expect(set.startMap[0]).to.be.equal(0); | ||
it('should throw if too many weights are given', () => { | ||
expect(() => new QueueSet([{ name: '1' }, { name: '2' }], null, createOptions([3, 2, 1]))).to.throw(Error); | ||
expect(() => new QueueSet([createBaseQueue('1'), createBaseQueue('2')], null, createOptions([3, 2, 1]))).to.throw(Error); | ||
}); | ||
it('should throw if no weights are given', () => { | ||
expect(() => new QueueSet([{ name: '1' }, { name: '2' }], null, [])).to.throw(Error); | ||
expect(() => new QueueSet([createBaseQueue('1'), createBaseQueue('2')], null, [])).to.throw(Error); | ||
}); | ||
it('should create a simple startMap', () => { | ||
const set = new QueueSet([{ name: '1' }, { name: '2' }], null, createOptions([3, 2])); | ||
const set = new QueueSet([createBaseQueue('1'), createBaseQueue('2')], null, createOptions([3, 2])); | ||
expect(set.startMap.length).to.be.equal(5); | ||
@@ -257,2 +257,3 @@ expect(set.startMap[0]).to.be.equal(0); | ||
const result = { name: name }; | ||
result.getName = () => { return name; }; | ||
result.pop = pop || (() => assert.fail('should not pop')); | ||
@@ -259,0 +260,0 @@ result.push = push || (() => assert.fail('should not push')); |
161647
3585