Comparing version 0.2.13 to 0.2.14
@@ -0,0 +0,0 @@ { |
@@ -0,0 +0,0 @@ { |
@@ -0,0 +0,0 @@ // Place your settings in this file to overwrite default and user settings. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -8,2 +8,3 @@ // Copyright (c) Microsoft Corporation. All rights reserved. | ||
const URL = require('url'); | ||
const debug = require('debug')('crawler:crawler'); | ||
@@ -30,2 +31,3 @@ class Crawler { | ||
if (orgList) { | ||
debug('orgList changed'); | ||
this.options.orgList = orgList.value.map(element => element.toLowerCase()); | ||
@@ -88,6 +90,7 @@ } | ||
*/ | ||
processOne() { | ||
processOne(context) { | ||
let requestBox = []; | ||
requestBox.loopName = context.name; | ||
return Q() | ||
.then(this.trace(this._getRequest.bind(this, requestBox))) | ||
.then(this.trace(this._getRequest.bind(this, requestBox, context))) | ||
.then(this.trace(this._filter.bind(this))) | ||
@@ -125,4 +128,7 @@ .then(this.trace(this._fetch.bind(this))) | ||
_getRequest(requestBox) { | ||
return this._logStartEnd('getRequest', null, () => { return this._getRequestWork(requestBox); }); | ||
_getRequest(requestBox, context) { | ||
return this._logStartEnd('getRequest', null, () => { | ||
const request = this._getRequestWork(requestBox); | ||
return request; | ||
}); | ||
} | ||
@@ -132,2 +138,3 @@ | ||
const self = this; | ||
debug(`getRequestWork(${requestBox.loopName}): enter`); | ||
return this.trace(this.queues.pop(), 'pop') | ||
@@ -140,6 +147,8 @@ .then(request => { | ||
request.markSkip('Drained ', `Waiting ${delay} milliseconds`); | ||
debug(`getRequestWork(${requestBox.loopName}): drained waiting ${delay} milliseconds`); | ||
} | ||
this.counter = ++this.counter % this.counterRollover; | ||
request.addMeta({ cid: this.counter.toString(36) }); | ||
request.addMeta({ loopName: requestBox.loopName, cid: this.counter.toString(36) }); | ||
requestBox[0] = request.open(self); | ||
debug(`getRequestWork(${requestBox.loopName}:${request.toUniqueString()}): exit (success)`); | ||
return requestBox[0]; | ||
@@ -152,3 +161,5 @@ }) | ||
_acquireLock(request) { | ||
debug(`_acquireLock(${request.meta.loopName}:${request.toUniqueString()}): enter`); | ||
if (!request.url || !this.locker || request.requiresLock === false) { | ||
debug(`_acquireLock(${request.meta.loopName}:${request.toUniqueString()}): exit (nothing to do)`); | ||
return Q(request); | ||
@@ -161,2 +172,3 @@ } | ||
lock => { | ||
debug(`_acquireLock(${request.meta.loopName}:${request.toUniqueString()}): exit (success)`); | ||
request.lock = lock; | ||
@@ -168,2 +180,3 @@ return request; | ||
// noting that we could not get a lock. For any other error, requeue and capture the error for debugging. | ||
debug(`_acquireLock(${request.meta.loopName}:${request.toUniqueString()}): exit (error)`); | ||
if (error.message.startsWith('Exceeded')) { | ||
@@ -178,3 +191,5 @@ return request.markRequeue('Collision', 'Could not lock'); | ||
_releaseLock(request) { | ||
debug(`_releaseLock(${request.meta.loopName}:${request.toUniqueString()}): enter`); | ||
if (!request.lock || !this.locker) { | ||
debug(`_releaseLock(${request.meta.loopName}:${request.toUniqueString()}): exit (nothing to do)`); | ||
return Q(request); | ||
@@ -187,2 +202,3 @@ } | ||
() => { | ||
debug(`_releaseLock(${request.meta.loopName}:${request.toUniqueString()}): exit (success)`); | ||
request.lock = null; | ||
@@ -192,2 +208,3 @@ return request; | ||
error => { | ||
debug(`_releaseLock(${request.meta.loopName}:${request.toUniqueString()}): exit (error)`); | ||
request.lock = null; | ||
@@ -220,10 +237,25 @@ self.logger.error(error); | ||
debug(`_completeRequest(${request.meta.loopName}:${request.toUniqueString()}): enter`); | ||
const self = this; | ||
if (forceRequeue || (request.shouldRequeue() && request.url)) { | ||
return Q | ||
.try(() => { return self._requeue(request); }) | ||
.catch(error => { self.logger.error(error); throw error; }) | ||
.finally(() => { return self._releaseLock(request); }) | ||
.then(() => { return self._deleteFromQueue(request); }, error => { return self._abandonInQueue(request); }) | ||
.then(() => request); | ||
.try(() => { | ||
return self._requeue(request); | ||
}) | ||
.catch(error => { | ||
self.logger.error(error); | ||
throw error; | ||
}) | ||
.finally(() => { | ||
return self._releaseLock(request); | ||
}) | ||
.then(() => { | ||
return self._deleteFromQueue(request); | ||
}, error => { | ||
return self._abandonInQueue(request); | ||
}) | ||
.then(() => { | ||
debug(`_completeRequest(${request.meta.loopName}:${request.toUniqueString()}): exit (success - force requeue)`); | ||
return request; | ||
}); | ||
} | ||
@@ -233,6 +265,8 @@ const completeWork = Q.all(request.getTrackedPromises()).then( | ||
return self._releaseLock(request).then( | ||
() => self._deleteFromQueue(request), | ||
() => { | ||
return self._deleteFromQueue(request); | ||
}, | ||
error => { | ||
self.logger.error(error); | ||
self._abandonInQueue(request); | ||
return self._abandonInQueue(request); | ||
}); | ||
@@ -244,6 +278,10 @@ }, | ||
}); | ||
return completeWork.then(() => request); | ||
return completeWork.then(() => { | ||
debug(`_completeRequest(${request.meta.loopName}:${request.toUniqueString()}): exit (success)`); | ||
return request; | ||
}); | ||
} | ||
_requeue(request) { | ||
debug(`_requeue(${request.meta.loopName}:${request.toUniqueString()}): enter`); | ||
return Q.try(() => { | ||
@@ -257,2 +295,5 @@ request.attemptCount = request.attemptCount || 0; | ||
return this.queues.repush(request, queuable); | ||
}).then(result => { | ||
debug(`_requeue(${request.meta.loopName}:${request.toUniqueString()}): exit (success)`); | ||
return result; | ||
}); | ||
@@ -262,3 +303,5 @@ } | ||
_filter(request) { | ||
debug(`_filter(${request.meta.loopName}:${request.toUniqueString()}): enter`); | ||
if (request.shouldSkip()) { | ||
debug(`_filter(${request.meta.loopName}:${request.toUniqueString()}): exit (nothing to do)`); | ||
return request; | ||
@@ -268,7 +311,10 @@ } | ||
// park the malformed request in the dead queue for debugging and ignore the returned promise | ||
debug(`_filter(${request.meta.loopName}:${request.toUniqueString()}): exit (malformed)`); | ||
return this._queueDeadAndLog(request, `Detected malformed request ${request.toString()}`); | ||
} | ||
if (this._shouldFilter(request)) { | ||
debug(`_filter(${request.meta.loopName}:${request.toUniqueString()}): exit (success - filtered)`); | ||
request.markSkip('Declined'); | ||
} | ||
debug(`_filter(${request.meta.loopName}:${request.toUniqueString()}): exit (success - not filtered)`); | ||
return request; | ||
@@ -278,3 +324,5 @@ } | ||
_fetch(request) { | ||
debug(`_fetch(${request.meta.loopName}:${request.toUniqueString()}): enter`); | ||
if (request.shouldSkip()) { | ||
debug(`_fetch(${request.meta.loopName}:${request.toUniqueString()}): exit (nothing to do)`); | ||
return request; | ||
@@ -287,2 +335,3 @@ } | ||
request.response = { headers: { etag: request.payload.etag } }; | ||
debug(`_fetch(${request.meta.loopName}:${request.toUniqueString()}): exit (success - payload)`); | ||
return request; | ||
@@ -292,2 +341,5 @@ } | ||
return this.fetcher.fetch(request); | ||
}).then(request => { | ||
debug(`_fetch(${request.meta.loopName}:${request.toUniqueString()}): exit (success - fetched)`); | ||
return request; | ||
}); | ||
@@ -297,3 +349,5 @@ } | ||
_convertToDocument(request) { | ||
debug(`_convertToDocument(${request.meta.loopName}:${request.toUniqueString()}): enter`); | ||
if (request.shouldSkip()) { | ||
debug(`_convertToDocument(${request.meta.loopName}:${request.toUniqueString()}): exit (nothing to do)`); | ||
return Q(request); | ||
@@ -330,2 +384,3 @@ } | ||
request.document._metadata = metadata; | ||
debug(`_convertToDocument(${request.meta.loopName}:${request.toUniqueString()}): exit (success)`); | ||
return Q(request); | ||
@@ -335,3 +390,5 @@ } | ||
_processDocument(request) { | ||
debug(`_processDocument(${request.meta.loopName}:${request.toUniqueString()}): enter`); | ||
if (request.shouldSkip()) { | ||
debug(`_processDocument(${request.meta.loopName}:${request.toUniqueString()}): exit (nothing to do)`); | ||
return Q(request); | ||
@@ -341,2 +398,3 @@ } | ||
request.document = this.processor.process(request); | ||
debug(`_processDocument(${request.meta.loopName}:${request.toUniqueString()}): exit (success)`); | ||
return request; | ||
@@ -372,3 +430,5 @@ }); | ||
_storeDocument(request) { | ||
debug(`_storeDocument(${request.meta.loopName}:${request.toUniqueString()}): enter`); | ||
if (request.shouldSkip() || !request.shouldSave()) { | ||
debug(`_storeDocument(${request.meta.loopName}:${request.toUniqueString()}): exit (nothing to do)`); | ||
return Q(request); | ||
@@ -381,2 +441,3 @@ } | ||
request.addMeta({ write: Date.now() - start }); | ||
debug(`_storeDocument(${request.meta.loopName}:${request.toUniqueString()}): exit (success)`); | ||
return request; | ||
@@ -387,4 +448,8 @@ }); | ||
_deleteFromQueue(request) { | ||
debug(`_deleteFromQueue(${request.meta.loopName}:${request.toUniqueString()}): enter`); | ||
return Q.try(() => { | ||
return this.queues.done(request).then(() => { return request; }); | ||
return this.queues.done(request).then(() => { | ||
debug(`_deleteFromQueue(${request.meta.loopName}:${request.toUniqueString()}): exit (success)`); | ||
return request; | ||
}); | ||
}); | ||
@@ -394,4 +459,8 @@ } | ||
_abandonInQueue(request) { | ||
debug(`_abandonInQueue(${request.meta.loopName}:${request.toUniqueString()}): enter`); | ||
return Q.try(() => { | ||
return this.queues.abandon(request).then(() => { return request; }); | ||
return this.queues.abandon(request).then(() => { | ||
debug(`_abandonInQueue(${request.meta.loopName}:${request.toUniqueString()}): exit (success)`); | ||
return request; | ||
}); | ||
}); | ||
@@ -398,0 +467,0 @@ } |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
{ | ||
"name": "ghcrawler", | ||
"version": "0.2.13", | ||
"version": "0.2.14", | ||
"description": "A robust GitHub API crawler that walks a queue of GitHub entities retrieving and storing their contents.", | ||
@@ -5,0 +5,0 @@ "main": "./index.js", |
@@ -0,0 +0,0 @@ ![Version](https://img.shields.io/npm/v/ghcrawler.svg) |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
@@ -0,0 +0,0 @@ // Copyright (c) Microsoft Corporation. All rights reserved. |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
5276
229336