pelias-address-deduplicator
Advanced tools
Comparing version 1.0.4 to 1.0.5
@@ -13,10 +13,14 @@ /** | ||
function getTimeMs(){ | ||
return new Date().getTime(); | ||
} | ||
/** | ||
* Return an address deduplication filter. | ||
* | ||
* @param {int} [requestBatchSize=100] The number of addresses to buffer into a | ||
* @param {int} [requestBatchSize=10000] The number of addresses to buffer into a | ||
* batch before sending it to the deduplicator. The higher the number, the | ||
* less time and energy collectively spent in making requests, but the | ||
* bigger the memory consumption buildup. | ||
* @param {int} [maxLiveRequests=10] Since the deduper is implemented as a | ||
* @param {int} [maxLiveRequests=4] Since the deduper is implemented as a | ||
* standalone server and processes data more slowly than the importer feeds | ||
@@ -38,3 +42,3 @@ * it, the stream needs to rate-limit itself. `maxLiveRequests` indicates | ||
var addresses = []; | ||
requestBatchSize = requestBatchSize || 100; | ||
requestBatchSize = requestBatchSize || 10000; | ||
@@ -48,3 +52,3 @@ // Used to close this stream after the input stream dries up and the last | ||
var streamPaused = false; | ||
maxLiveRequests = maxLiveRequests || 10; | ||
maxLiveRequests = maxLiveRequests || 4; | ||
@@ -60,7 +64,9 @@ // Number of duplicate addresses detected. | ||
duplicates: 0, | ||
uniques: 0 | ||
uniques: 0, | ||
timeSpentPaused: 0 | ||
}; | ||
var pauseTime; | ||
var intervalId = setInterval( function ( ){ | ||
stats.uniques = stats.total - stats.duplicates; | ||
logger.verbose( stats ); | ||
@@ -96,2 +102,3 @@ }, 1e4); | ||
else { | ||
stats.uniques++; | ||
batch[ ind ].setId( addressResp.guid ); | ||
@@ -110,2 +117,3 @@ downstream.push( batch[ ind ] ); | ||
if( liveRequests < maxLiveRequests && streamPaused ){ | ||
stats.timeSpentPaused += getTimeMs() - pauseTime; | ||
streamPaused = false; | ||
@@ -119,2 +127,3 @@ downstream.emit( 'resumeStream' ); | ||
if( liveRequests >= maxLiveRequests ){ | ||
pauseTime = getTimeMs(); | ||
streamPaused = true; | ||
@@ -151,4 +160,5 @@ } | ||
*/ | ||
function signalStreamEnd( ){ | ||
function signalStreamEnd(){ | ||
streamEnded = true; | ||
sendBatch( addresses, this ); | ||
} | ||
@@ -155,0 +165,0 @@ |
{ | ||
"name": "pelias-address-deduplicator", | ||
"version": "1.0.4", | ||
"version": "1.0.5", | ||
"description": "A stream for deduplicating a stream of address objects.", | ||
@@ -5,0 +5,0 @@ "main": "address_deduplicator_stream.js", |
@@ -10,7 +10,7 @@ # address deduplicator | ||
* `requestBatchSize` (default: `100`): The number of addresses to buffer into a | ||
* `requestBatchSize` (default: `10000`): The number of addresses to buffer into a | ||
batch before sending it to the deduplicator. The higher the number, the | ||
less time and energy collectively spent in making requests, but the | ||
bigger the memory consumption buildup. | ||
* `maxLiveRequests` (default: `10`): Since the deduper is implemented as a | ||
* `maxLiveRequests` (default: `4`): Since the deduper is implemented as a | ||
standalone server and processes data more slowly than the importer feeds | ||
@@ -17,0 +17,0 @@ it, the stream needs to rate-limit itself. `maxLiveRequests` indicates |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
160
8347
6