Comparing version 0.2.18 to 0.2.19
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
const debug = require('debug')('crawler:crawler'); | ||
const moment = require('moment'); | ||
@@ -8,3 +9,4 @@ const Q = require('q'); | ||
const URL = require('url'); | ||
const debug = require('debug')('crawler:crawler'); | ||
const uuid = require('node-uuid'); | ||
debug.log = console.info.bind(console); | ||
@@ -14,5 +16,6 @@ | ||
constructor(queues, store, locker, fetcher, processor, options) { | ||
constructor(queues, store, deadletters, locker, fetcher, processor, options) { | ||
this.queues = queues; | ||
this.store = store; | ||
this.deadletters = deadletters; | ||
this.locker = locker; | ||
@@ -303,3 +306,3 @@ this.fetcher = fetcher; | ||
if (++request.attemptCount > 5) { | ||
return this._queueDeadAndLog(request, `Exceeded attempt count for ${request.type}@${request.url}`); | ||
return this._storeDeadletter(request, `Exceeded attempt count for ${request.type}@${request.url}`); | ||
} | ||
@@ -324,3 +327,3 @@ request.addMeta({ attempt: request.attemptCount }); | ||
debug(`_filter(${request.meta.loopName}:${request.toUniqueString()}): exit (malformed)`); | ||
return this._queueDeadAndLog(request, `Detected malformed request ${request.toString()}`); | ||
return this._storeDeadletter(request, `Detected malformed request ${request.toString()}`); | ||
} | ||
@@ -484,16 +487,33 @@ if (this._shouldFilter(request)) { | ||
_queueDeadAndLog(request, reason) { | ||
const error = new Error(reason); | ||
error._type = request.type; | ||
error._url = request.url; | ||
error._cid = request.cid; | ||
this.logger.error(error, request.meta); | ||
return request.markDead('Archived', reason); | ||
_storeDeadletter(request, reason, error = null) { | ||
request._error = error; | ||
return request.markDead('Deadletter', reason); | ||
} | ||
queueDead(request) { | ||
const queuable = request.createRequeuable(); | ||
return this.queues.pushDead(queuable); | ||
storeDeadletter(request, reason = null) { | ||
debug(`_storeDeadletter(${request.meta.loopName}:${request.toUniqueString()}): enter`); | ||
const document = this._createDeadletter(request, reason); | ||
return this.deadletters.upsert(document).then(() => { | ||
debug(`_storeDeadletter(${request.meta.loopName}:${request.toUniqueString()}): exit (success)`); | ||
return request; | ||
}); | ||
} | ||
_createDeadletter(request, reason) { | ||
const deadDocument = request.createRequeuable(); | ||
const metadata = deadDocument._metadata = {}; | ||
if (request._error) { | ||
metadata.errorMessage = request._error.message; | ||
metadata.errorStack = request._error.stack; | ||
} | ||
metadata.version = 1; | ||
metadata.meta = request.meta; | ||
metadata.type = 'deadletter'; | ||
metadata.url = request.url.replace('//', '//deadletter.'); | ||
metadata.fetchedAt = metadata.processedAt = moment.utc().toISOString(); | ||
metadata.links = { self: { href: `urn:deadletter:${uuid.v4()}`, type: 'resource' } }; | ||
metadata.extra = { type: request.type, url: request.url, reason: reason }; | ||
return deadDocument; | ||
} | ||
queue(requests, name = null) { | ||
@@ -507,3 +527,3 @@ return this.queues.push(this._preFilter(requests), name || 'normal'); | ||
if (!request.url || !request.type) { | ||
this._queueDeadAndLog(request, `Attempt to queue malformed request ${request.toString()}`); | ||
this._storeDeadletter(request, `Attempt to queue malformed request ${request.toString()}`); | ||
return false; | ||
@@ -510,0 +530,0 @@ } |
@@ -95,5 +95,17 @@ // Copyright (c) Microsoft Corporation. All rights reserved. | ||
} | ||
return queue.popMany(count, remove); | ||
const result = []; | ||
for (let i = 0; i < count; i++) { | ||
result.push(this.pop()); | ||
} | ||
return Q.all(result).then(requests => { | ||
const filtered = requests.filter(request => request); | ||
return Q.all(filtered.map(request => remove ? this.done(request) : this.abandon(request))).thenResolve(filtered); | ||
}); | ||
} | ||
listDeadletters() { | ||
return this.crawler.deadletters.listDocuments(); | ||
} | ||
_reconfigure(current, changes) { | ||
@@ -100,0 +112,0 @@ // if the loop count changed, make it so |
@@ -67,3 +67,3 @@ // Copyright (c) Microsoft Corporation. All rights reserved. | ||
if (request.attemptCount && request.attemptCount > 1) { | ||
return request.markDead('Bailed', `After ${request.attemptCount} tries.`); | ||
return request.markDead('Bailed', `After ${request.attemptCount} tries. Status ${response.statusCode}`); | ||
} | ||
@@ -70,0 +70,0 @@ return request.markRequeue('Missing', 'Requeuing...'); |
@@ -7,3 +7,3 @@ // Copyright (c) Microsoft Corporation. All rights reserved. | ||
class QueueSet { | ||
constructor(queues, deadletter, options) { | ||
constructor(queues, options) { | ||
this.queues = queues; | ||
@@ -17,3 +17,2 @@ this.queueTable = queues.reduce((table, queue) => { | ||
} | ||
this.deadletter = deadletter; | ||
this.options = options; | ||
@@ -36,6 +35,2 @@ this.options._config.on('changed', this._reconfigure.bind(this)); | ||
pushDead(requests) { | ||
return this.deadletter.push(requests); | ||
} | ||
repush(original, newRequest) { | ||
@@ -47,7 +42,7 @@ const queue = original._retryQueue ? this.getQueue(original._retryQueue) : original._originQueue; | ||
subscribe() { | ||
return Q.all(this.queues.concat([this.deadletter]).map(queue => { return queue.subscribe(); })); | ||
return Q.all(this.queues.map(queue => { return queue.subscribe(); })); | ||
} | ||
unsubscribe() { | ||
return Q.all(this.queues.concat([this.deadletter]).map(queue => { return queue.unsubscribe(); })); | ||
return Q.all(this.queues.map(queue => { return queue.unsubscribe(); })); | ||
} | ||
@@ -93,3 +88,3 @@ | ||
getQueue(name) { | ||
const result = name === 'deadletter' ? this.deadletter : this.queueTable[name]; | ||
const result = this.queueTable[name]; | ||
if (!result) { | ||
@@ -96,0 +91,0 @@ throw new Error(`Queue not found: ${name}`); |
@@ -51,3 +51,3 @@ // Copyright (c) Microsoft Corporation. All rights reserved. | ||
if (!this.policy) { | ||
return this.crawler.queueDead(this); | ||
return this.markDead('Bogus', 'No policy'); | ||
} | ||
@@ -59,3 +59,3 @@ if (typeof this.policy === 'string') { | ||
if (!policy) { | ||
return this.crawler.queueDead(this); | ||
return this.markDead('Bogus', 'Unable to resolve policy'); | ||
} | ||
@@ -168,3 +168,3 @@ this.policy = policy; | ||
markDead(outcome, message) { | ||
this.crawler.queueDead(this); | ||
this.track(this.crawler.storeDeadletter(this, message)); | ||
return this.markSkip(outcome, message); | ||
@@ -171,0 +171,0 @@ } |
{ | ||
"name": "ghcrawler", | ||
"version": "0.2.18", | ||
"version": "0.2.19", | ||
"description": "A robust GitHub API crawler that walks a queue of GitHub entities retrieving and storing their contents.", | ||
@@ -5,0 +5,0 @@ "main": "./index.js", |
237697
5312