fast-stats
Advanced tools
Comparing version 0.0.1 to 0.0.2
545
faststats.js
@@ -8,9 +8,56 @@ /* | ||
function Stats() { | ||
var a = arguments; | ||
if(a[0] instanceof Array) | ||
a=a[0]; | ||
var config_params = { | ||
bucket_precision: function(o, s) { | ||
if(typeof s != "number" || s <= 0) { | ||
throw new Error("bucket_precision must be a positive number"); | ||
} | ||
o._config.bucket_precision = s; | ||
o.buckets = []; | ||
}, | ||
buckets: function(o, b) { | ||
if(!Array.isArray(b) || b.length == 0) { | ||
throw new Error("buckets must be an array of bucket limits"); | ||
} | ||
o._config.buckets = b; | ||
o.buckets = []; | ||
}, | ||
bucket_extension_interval: function(o, s) { | ||
if(s === undefined) | ||
return; | ||
if(typeof s != "number" || s<=0) { | ||
throw new Error("bucket_extension_interval must be a positive number"); | ||
} | ||
o._config.bucket_extension_interval = s; | ||
}, | ||
store_data: function(o, s) { | ||
if(typeof s != "boolean") { | ||
throw new Error("store_data must be a true or false"); | ||
} | ||
o._config.store_data = s; | ||
}, | ||
sampling: function(o, s) { | ||
if(typeof s != "boolean") { | ||
throw new Error("sampling must be a true or false"); | ||
} | ||
o._config.sampling = s; | ||
} | ||
}; | ||
function Stats(c) { | ||
this._config = { store_data: true }; | ||
if(c) { | ||
for(var k in config_params) { | ||
if(c.hasOwnProperty(k)) { | ||
config_params[k](this, c[k]); | ||
} | ||
} | ||
} | ||
this.reset(); | ||
if(a) | ||
this.push.apply(this, a); | ||
@@ -21,4 +68,7 @@ return this; | ||
Stats.prototype = { | ||
reset: function() { | ||
this.data = []; | ||
if(this._config.store_data) | ||
this.data = []; | ||
this.length = 0; | ||
@@ -30,2 +80,3 @@ | ||
this.sum_of_square_of_logs = 0; | ||
this.zeroes = 0; | ||
this.max = this.min = null; | ||
@@ -39,22 +90,68 @@ | ||
_reset_cache: function() { | ||
this._amean = null; | ||
this._gmean = null; | ||
this._stddev = null; | ||
this._gstddev = null; | ||
this._moe = null; | ||
this._data_sorted = null; | ||
if(this._config.store_data) | ||
this._data_sorted = null; | ||
}, | ||
_find_bucket: function(a) { | ||
var b=0, e, l; | ||
if(this._config.buckets) { | ||
l = this._config.buckets.length; | ||
if(this._config.bucket_extension_interval && a >= this._config.buckets[l-1]) { | ||
e=a-this._config.buckets[l-1]; | ||
b = parseInt(e/this._config.bucket_extension_interval) + l; | ||
if(this._config.buckets[b] === undefined) | ||
this._config.buckets[b] = this._config.buckets[l-1] + (parseInt(e/this._config.bucket_extension_interval)+1)*this._config.bucket_extension_interval; | ||
if(this._config.buckets[b-1] === undefined) | ||
this._config.buckets[b-1] = this._config.buckets[l-1] + parseInt(e/this._config.bucket_extension_interval)*this._config.bucket_extension_interval; | ||
} | ||
for(; b<l; b++) { | ||
if(a < this._config.buckets[b]) { | ||
break; | ||
} | ||
} | ||
} | ||
else if(this._config.bucket_precision) { | ||
b = Math.floor(a/this._config.bucket_precision); | ||
} | ||
return b; | ||
}, | ||
_add_cache: function(a) { | ||
this.sum += a; | ||
this.sum_of_squares += a*a; | ||
this.sum_of_logs += Math.log(a); | ||
this.sum_of_square_of_logs += Math.pow(Math.log(a), 2); | ||
this.length++; | ||
var tuple=[1], i; | ||
if(a instanceof Array) { | ||
tuple = a; | ||
a = tuple.shift(); | ||
} | ||
if(this.max === null || this.max < a) | ||
this.max = a; | ||
if(this.min === null || this.min > a) | ||
this.min = a; | ||
this.sum += a*tuple[0]; | ||
this.sum_of_squares += a*a*tuple[0]; | ||
if(a === 0) { | ||
this.zeroes++; | ||
} | ||
else { | ||
this.sum_of_logs += Math.log(a)*tuple[0]; | ||
this.sum_of_square_of_logs += Math.pow(Math.log(a), 2)*tuple[0]; | ||
} | ||
this.length += tuple[0]; | ||
if(tuple[0] > 0) { | ||
if(this.max === null || this.max < a) | ||
this.max = a; | ||
if(this.min === null || this.min > a) | ||
this.min = a; | ||
} | ||
if(this.buckets) { | ||
var b = this._find_bucket(a); | ||
if(!this.buckets[b]) | ||
this.buckets[b] = [0]; | ||
this.buckets[b][0] += tuple.shift(); | ||
for(i=0; i<tuple.length; i++) | ||
this.buckets[b][i+1] = (this.buckets[b][i+1]|0) + (tuple[i]|0); | ||
} | ||
this._reset_cache(); | ||
@@ -64,22 +161,53 @@ }, | ||
_del_cache: function(a) { | ||
this.sum -= a; | ||
this.sum_of_squares -= a*a; | ||
this.sum_of_logs -= Math.log(a); | ||
this.sum_of_square_of_logs -= Math.pow(Math.log(a), 2); | ||
this.length--; | ||
var tuple=[1], i; | ||
if(a instanceof Array) { | ||
tuple = a; | ||
a = tuple.shift(); | ||
} | ||
if(this.length === 0) { | ||
this.max = this.min = null; | ||
this.sum -= a*tuple[0]; | ||
this.sum_of_squares -= a*a*tuple[0]; | ||
if(a === 0) { | ||
this.zeroes--; | ||
} | ||
else if(this.max === a || this.min === a) { | ||
var i = this.length-1; | ||
this.max = this.min = this.data[i--]; | ||
while(i--) { | ||
if(this.max < this.data[i]) | ||
this.max = this.data[i]; | ||
if(this.min > this.data[i]) | ||
this.min = this.data[i]; | ||
else { | ||
this.sum_of_logs -= Math.log(a)*tuple[0]; | ||
this.sum_of_square_of_logs -= Math.pow(Math.log(a), 2)*tuple[0]; | ||
} | ||
this.length -= tuple[0]; | ||
if(this._config.store_data) { | ||
if(this.length === 0) { | ||
this.max = this.min = null; | ||
} | ||
if(this.length === 1) { | ||
this.max = this.min = this.data[0]; | ||
} | ||
else if(tuple[0] > 0 && (this.max === a || this.min === a)) { | ||
var i = this.length-1; | ||
if(i>=0) { | ||
this.max = this.min = this.data[i--]; | ||
while(i-- >= 0) { | ||
if(this.max < this.data[i]) | ||
this.max = this.data[i]; | ||
if(this.min > this.data[i]) | ||
this.min = this.data[i]; | ||
} | ||
} | ||
} | ||
} | ||
if(this.buckets) { | ||
var b=this._find_bucket(a); | ||
if(this.buckets[b]) { | ||
this.buckets[b][0] -= tuple.shift(); | ||
if(this.buckets[b][0] === 0) | ||
delete this.buckets[b]; | ||
else | ||
for(i=0; i<tuple.length; i++) | ||
this.buckets[b][i+1] = (this.buckets[b][i+1]|0) - (tuple[i]|0); | ||
} | ||
} | ||
this._reset_cache(); | ||
@@ -89,14 +217,24 @@ }, | ||
push: function() { | ||
var i, a; | ||
for(i=0; i<arguments.length; i++) { | ||
a = arguments[i]; | ||
this.data.push(a); | ||
var i, a, args=Array.prototype.slice.call(arguments, 0); | ||
if(args.length && args[0] instanceof Array) | ||
args = args[0]; | ||
for(i=0; i<args.length; i++) { | ||
a = args[i]; | ||
if(this._config.store_data) | ||
this.data.push(a); | ||
this._add_cache(a); | ||
} | ||
return this.length; | ||
return this; | ||
}, | ||
push_tuple: function(tuple) { | ||
if(!this.buckets) { | ||
throw new Error("push_tuple is only valid when using buckets"); | ||
} | ||
this._add_cache(tuple); | ||
}, | ||
pop: function() { | ||
if(this.length === 0) | ||
if(this.length === 0 || this._config.store_data === false) | ||
return undefined; | ||
@@ -110,15 +248,44 @@ | ||
remove_tuple: function(tuple) { | ||
if(!this.buckets) { | ||
throw new Error("remove_tuple is only valid when using buckets"); | ||
} | ||
this._del_cache(tuple); | ||
}, | ||
reset_tuples: function(tuple) { | ||
var b, l, t, ts=tuple.length; | ||
if(!this.buckets) { | ||
throw new Error("reset_tuple is only valid when using buckets"); | ||
} | ||
for(b=0, l=this.buckets.length; b<l; b++) { | ||
if(!this.buckets[b] || this.buckets[b].length <= 1) { | ||
continue; | ||
} | ||
for(t=0; t<ts; t++) { | ||
if(typeof tuple[t] !== 'undefined') { | ||
this.buckets[b][t] = tuple[t]; | ||
} | ||
} | ||
} | ||
}, | ||
unshift: function() { | ||
var i=arguments.length, a; | ||
var i, a, args=Array.prototype.slice.call(arguments, 0); | ||
if(args.length && args[0] instanceof Array) | ||
args = args[0]; | ||
i=args.length; | ||
while(i--) { | ||
a = arguments[i]; | ||
this.data.unshift(a); | ||
a = args[i]; | ||
if(this._config.store_data) | ||
this.data.unshift(a); | ||
this._add_cache(a); | ||
} | ||
return this.length; | ||
return this; | ||
}, | ||
shift: function() { | ||
if(this.length === 0) | ||
if(this.length === 0 || this._config.store_data === false) | ||
return undefined; | ||
@@ -135,6 +302,3 @@ | ||
return NaN; | ||
if(this._amean === null) | ||
this._amean = this.sum/this.length; | ||
return this._amean; | ||
return this.sum/this.length; | ||
}, | ||
@@ -145,6 +309,5 @@ | ||
return NaN; | ||
if(this._gmean === null) | ||
this._gmean = Math.exp(this.sum_of_logs/this.length); | ||
return this._gmean; | ||
if(this.zeroes > 0) | ||
return NaN; | ||
return Math.exp(this.sum_of_logs/this.length); | ||
}, | ||
@@ -155,4 +318,7 @@ | ||
return NaN; | ||
var n=this.length; | ||
if(this._config.sampling) | ||
n--; | ||
if(this._stddev === null) | ||
this._stddev = Math.sqrt(this.length * this.sum_of_squares - this.sum*this.sum)/this.length; | ||
this._stddev = Math.sqrt((this.length * this.sum_of_squares - this.sum*this.sum)/(this.length*n)); | ||
@@ -165,6 +331,8 @@ return this._stddev; | ||
return NaN; | ||
if(this._gstddev === null) | ||
this._gstddev = Math.exp(Math.sqrt(this.length * this.sum_of_square_of_logs - this.sum_of_logs*this.sum_of_logs)/this.length); | ||
return this._gstddev; | ||
if(this.zeroes > 0) | ||
return NaN; | ||
var n=this.length; | ||
if(this._config.sampling) | ||
n--; | ||
return Math.exp(Math.sqrt((this.length * this.sum_of_square_of_logs - this.sum_of_logs*this.sum_of_logs)/(this.length*n))); | ||
}, | ||
@@ -176,6 +344,3 @@ | ||
// see http://en.wikipedia.org/wiki/Standard_error_%28statistics%29 | ||
if(this._moe === null) | ||
this._moe = 1.96*this.stddev()/Math.sqrt(this.length); | ||
return this._moe; | ||
return 1.96*this.stddev()/Math.sqrt(this.length); | ||
}, | ||
@@ -189,29 +354,132 @@ | ||
distribution: function() { | ||
if(this.length === 0) | ||
return []; | ||
if(!this.buckets) | ||
throw new Error("bucket_precision or buckets not configured."); | ||
var d=[], i, j, k, l; | ||
if(this._config.buckets) { | ||
j=this.min; | ||
l=Math.min(this.buckets.length, this._config.buckets.length); | ||
for(i=0; i<l; j=this._config.buckets[i++]) { // this has to be i++ and not ++i | ||
if(this._config.buckets[i] === undefined && this._config.bucket_extension_interval) | ||
this._config.buckets[i] = this._config.buckets[i-1] + this._config.bucket_extension_interval; | ||
if(this.min > this._config.buckets[i]) | ||
continue; | ||
d[i] = { | ||
bucket: (j+this._config.buckets[i])/2, | ||
range: [j, this._config.buckets[i]], | ||
count: (this.buckets[i]?this.buckets[i][0]:0), | ||
tuple: this.buckets[i]?this.buckets[i].slice(1):[] | ||
}; | ||
if(this.max < this._config.buckets[i]) | ||
break; | ||
} | ||
if(i == l && this.buckets[i]) { | ||
d[i] = { | ||
bucket: (j + this.max)/2, | ||
range: [j, this.max], | ||
count: this.buckets[i][0], | ||
tuple: this.buckets[i]?this.buckets[i].slice(1):[] | ||
}; | ||
} | ||
} | ||
else if(this._config.bucket_precision) { | ||
i=Math.floor(this.min/this._config.bucket_precision); | ||
l=Math.floor(this.max/this._config.bucket_precision)+1; | ||
for(j=0; i<l && i<this.buckets.length; i++, j++) { | ||
if(!this.buckets[i]) { | ||
continue; | ||
} | ||
d[j] = { | ||
bucket: (i+0.5)*this._config.bucket_precision, | ||
range: [i*this._config.bucket_precision, (i+1)*this._config.bucket_precision], | ||
count: this.buckets[i][0], | ||
tuple: this.buckets[i]?this.buckets[i].slice(1):[] | ||
}; | ||
} | ||
} | ||
return d; | ||
}, | ||
percentile: function(p) { | ||
if(this.length === 0) | ||
if(this.length === 0 || (!this._config.store_data && !this.buckets)) | ||
return NaN; | ||
if(this._data_sorted === null) | ||
this._data_sorted = this.data.sort(asc); | ||
// If we come here, we either have sorted data or sorted buckets | ||
var v; | ||
if(p <= 0) | ||
return this._data_sorted[0]; | ||
if(p == 25) | ||
return (this._data_sorted[Math.floor((this.length-1)*0.25)] + this._data_sorted[Math.ceil((this.length-1)*0.25)])/2; | ||
if(p == 50) | ||
return this.median(); | ||
if(p == 75) | ||
return (this._data_sorted[Math.floor((this.length-1)*0.75)] + this._data_sorted[Math.ceil((this.length-1)*0.75)])/2; | ||
if(p >= 100) | ||
return this._data_sorted[this.length-1]; | ||
v=0; | ||
else if(p == 25) | ||
v = [Math.floor((this.length-1)*0.25), Math.ceil((this.length-1)*0.25)]; | ||
else if(p == 50) | ||
v = [Math.floor((this.length-1)*0.5), Math.ceil((this.length-1)*0.5)]; | ||
else if(p == 75) | ||
v = [Math.floor((this.length-1)*0.75), Math.ceil((this.length-1)*0.75)]; | ||
else if(p >= 100) | ||
v = this.length-1; | ||
else | ||
v = Math.floor(this.length*p/100); | ||
return this._data_sorted[Math.floor(this.length*p/100)]; | ||
if(v === 0) | ||
return this.min; | ||
if(v === this.length-1) | ||
return this.max; | ||
if(this._config.store_data) { | ||
if(this._data_sorted === null) | ||
this._data_sorted = this.data.slice(0).sort(asc); | ||
if(typeof v == 'number') | ||
return this._data_sorted[v]; | ||
else | ||
return (this._data_sorted[v[0]] + this._data_sorted[v[1]])/2; | ||
} | ||
else { | ||
var j; | ||
if(typeof v != 'number') | ||
v = (v[0]+v[1])/2; | ||
if(this._config.buckets) | ||
j=0; | ||
else if(this._config.bucket_precision) | ||
j = Math.floor(this.min/this._config.bucket_precision); | ||
for(; j<this.buckets.length; j++) { | ||
if(!this.buckets[j]) | ||
continue; | ||
if(v<=this.buckets[j][0]) { | ||
break; | ||
} | ||
v-=this.buckets[j][0]; | ||
} | ||
return this._get_nth_in_bucket(v, j); | ||
} | ||
}, | ||
_get_nth_in_bucket: function(n, b) { | ||
var range = []; | ||
if(this._config.buckets) { | ||
range[0] = (b>0?this._config.buckets[b-1]:this.min); | ||
range[1] = (b<this._config.buckets.length?this._config.buckets[b]:this.max); | ||
} | ||
else if(this._config.bucket_precision) { | ||
range[0] = Math.max(b*this._config.bucket_precision, this.min); | ||
range[1] = Math.min((b+1)*this._config.bucket_precision, this.max); | ||
} | ||
return range[0] + (range[1] - range[0])*n/this.buckets[b][0]; | ||
}, | ||
median: function() { | ||
if(this.length === 0) | ||
return NaN; | ||
if(this._data_sorted === null) | ||
this._data_sorted = this.data.sort(asc); | ||
return (this._data_sorted[Math.floor((this.length-1)/2)] + this._data_sorted[Math.ceil((this.length-1)/2)])/2; | ||
return this.percentile(50); | ||
}, | ||
@@ -230,22 +498,92 @@ | ||
band_pass: function(low, high, open) { | ||
var i, b=new Stats(); | ||
band_pass: function(low, high, open, config) { | ||
var i, j, b, b_val, i_val; | ||
if(!config) | ||
config = this._config; | ||
b = new Stats(config); | ||
if(this.length === 0) | ||
return new Stats(); | ||
return b; | ||
if(this._data_sorted === null) | ||
this._data_sorted = this.data.sort(asc); | ||
if(this._config.store_data) { | ||
if(this._data_sorted === null) | ||
this._data_sorted = this.data.slice(0).sort(asc); | ||
for(i=0; i<this.length && (this._data_sorted[i] < high || (!open && this._data_sorted[i] === high)); i++) { | ||
if(this._data_sorted[i] > low || (!open && this._data_sorted[i] === low)) { | ||
b.push(this._data_sorted[i]); | ||
} | ||
} | ||
} | ||
else if(this._config.buckets) { | ||
for(i=0; i<=this._config.buckets.length; i++) { | ||
if(this._config.buckets[i] < this.min) | ||
continue; | ||
for(i=0; i<this.length && (this._data_sorted[i] < high || (!open && this._data_sorted[i] === high)); i++) { | ||
if(this._data_sorted[i] > low || (!open && this._data_sorted[i] === low)) { | ||
b.push(this._data_sorted[i]); | ||
b_val = (i==0?this.min:this._config.buckets[i-1]); | ||
if(b_val < this.min) | ||
b_val = this.min; | ||
if(b_val > this.max) | ||
b_val = this.max; | ||
if(high < b_val || (open && high === b_val)) { | ||
break; | ||
} | ||
if(low < b_val || (!open && low === b_val)) { | ||
for(j=0; j<(this.buckets[i]?this.buckets[i][0]:0); j++) { | ||
i_val = Stats.prototype._get_nth_in_bucket.call(this, j, i); | ||
if( (i_val > low || (!open && i_val === low)) | ||
&& (i_val < high || (!open && i_val === high)) | ||
) { | ||
b.push(i_val); | ||
} | ||
} | ||
} | ||
} | ||
b.min = Math.max(low, b.min); | ||
b.max = Math.min(high, b.max); | ||
} | ||
else if(this._config.bucket_precision) { | ||
var low_i = Math.floor(low/this._config.bucket_precision), | ||
high_i = Math.floor(high/this._config.bucket_precision)+1; | ||
for(i=low_i; i<Math.min(this.buckets.length, high_i); i++) { | ||
for(j=0; j<(this.buckets[i]?this.buckets[i][0]:0); j++) { | ||
i_val = Stats.prototype._get_nth_in_bucket.call(this, j, i); | ||
if( (i_val > low || (!open && i_val === low)) | ||
&& (i_val < high || (!open && i_val === high)) | ||
) { | ||
b.push(i_val); | ||
} | ||
} | ||
} | ||
b.min = Math.max(low, b.min); | ||
b.max = Math.min(high, b.max); | ||
} | ||
return b; | ||
}, | ||
copy: function() { | ||
return new Stats(this.data); | ||
copy: function(config) { | ||
var b = Stats.prototype.band_pass.call(this, this.min, this.max, false, config); | ||
b.sum = this.sum; | ||
b.sum_of_squares = this.sum_of_squares; | ||
b.sum_of_logs = this.sum_of_logs; | ||
b.sum_of_square_of_logs = this.sum_of_square_of_logs; | ||
b.zeroes = this.zeroes; | ||
return b; | ||
}, | ||
Σ: function() { | ||
return this.sum; | ||
}, | ||
Π: function() { | ||
return this.zeroes > 0 ? 0 : Math.exp(this.sum_of_logs); | ||
} | ||
@@ -261,3 +599,3 @@ }; | ||
if(process.argv[1] && process.argv[1].match(__filename)) { | ||
var s = new Stats(1, 2, 3); | ||
var s = new Stats({store_data:false, buckets: [ 1, 5, 10, 15, 20, 25, 30, 35 ]}).push(1, 2, 3); | ||
var l = process.argv.slice(2); | ||
@@ -268,3 +606,20 @@ if(!l.length) l = [10, 11, 15, 8, 13, 12, 19, 32, 17, 16]; | ||
console.log(s.data); | ||
console.log(s.amean().toFixed(2), s.μ().toFixed(2), s.stddev().toFixed(2), s.σ().toFixed(2), s.gmean().toFixed(2), s.median().toFixed(2), s.moe().toFixed(2)); | ||
console.log(s.amean().toFixed(2), s.μ().toFixed(2), s.stddev().toFixed(2), s.σ().toFixed(2), s.gmean().toFixed(2), s.median().toFixed(2), s.moe().toFixed(2), s.distribution()); | ||
var t=s.copy({buckets: [0, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 25, 30, 35] }); | ||
console.log(t.amean().toFixed(2), t.μ().toFixed(2), t.stddev().toFixed(2), t.σ().toFixed(2), t.gmean().toFixed(2), t.median().toFixed(2), t.moe().toFixed(2), t.distribution()); | ||
s = new Stats({store_data: false, buckets: [1, 5, 10, 15, 20, 25, 30, 35]}); | ||
s.push_tuple([1, 1, 3, 4]); | ||
s.push_tuple([2, 1, 5, 8]); | ||
s.push_tuple([3, 1, 4, 9]); | ||
s.push_tuple([1, 1, 13, 14]); | ||
console.log(s.amean(), s.median()); | ||
console.log(s.distribution()); | ||
s.remove_tuple([1, 1, 3, 4]); | ||
s.push_tuple([4, 1, 3, 3]); | ||
console.log(s.amean(), s.median()); | ||
console.log(s.distribution()); | ||
} |
{ | ||
"name" : "fast-stats", | ||
"version" : "0.0.1", | ||
"description" : "Quickly calculate common statistics on lists of numbers", | ||
"keywords" : ["statistics", "statistic", "gauss", "lognormal", "normal", "mean", "median", "mode", "standard deviation", "margin of error", "iqr", "quartile", "inter quartile range"], | ||
"homepage" : "https://github.com/bluesmoon/node-faststats", | ||
"author" : "Philip Tellis <philip@bluesmoon.info> (http://bluesmoon.info/)", | ||
"main" : "faststats.js", | ||
"repository" : { "type": "git", "url": "git://github.com/bluesmoon/node-faststats.git" } | ||
"name": "fast-stats", | ||
"version": "0.0.2", | ||
"description": "Quickly calculate common statistics on lists of numbers", | ||
"keywords": [ | ||
"statistics", | ||
"statistic", | ||
"gauss", | ||
"lognormal", | ||
"normal", | ||
"mean", | ||
"median", | ||
"mode", | ||
"standard deviation", | ||
"margin of error", | ||
"iqr", | ||
"quartile", | ||
"inter quartile range" | ||
], | ||
"homepage": "https://github.com/bluesmoon/node-faststats", | ||
"author": "Philip Tellis <philip@bluesmoon.info> (http://bluesmoon.info/)", | ||
"main": "faststats.js", | ||
"repository": { | ||
"type": "git", | ||
"url": "git://github.com/bluesmoon/node-faststats.git" | ||
}, | ||
"dependencies": {}, | ||
"devDependencies": {}, | ||
"engines": { | ||
"node": "*" | ||
} | ||
} | ||
187
README.md
@@ -31,3 +31,3 @@ Fast Statistics | ||
var s = new Stats(1, 2, 3, 10, 8, 4, 3); | ||
var s = new Stats().push(1, 2, 3, 10, 8, 4, 3); | ||
console.log(s.amean().toFixed(2)); | ||
@@ -48,2 +48,51 @@ // 4.43 | ||
### Configuring the Stats object | ||
The `Stats` constructor takes in a configuration object as a parameter. This is a simple key-value list that tells | ||
`fast-stats` how to behave under certain conditions. | ||
```javascript | ||
var s = new Stats({ bucket_precision: 10 }); | ||
``` | ||
The following configuration options are recognised. All of them are optional. | ||
* `bucket_precision`: *[number]* Tells `fast-stats` to maintain a histogram of your dataset using this parameter as the least | ||
count, or precision. | ||
This is useful if you have a very large data set, and want to approximate percentile values like the median | ||
without having to store the entire dataset in memory. For example, if you had a million time measurements | ||
between 0.5 and 1.5 seconds, you could store all million of them, or you could set up 1000 one millisecond | ||
buckets and store a count of items in each bucket with a precision of 1 millisecond each. If you reduce (higher | ||
values are considered less precise) the precision to 10 milliseconds, the number of buckets reduces from 1000 | ||
to 100, taking up less memory overall. | ||
By default, `fast-stats` will not maintain buckets since it does not know the least count and range of your | ||
dataset in advance. | ||
This option is required if you need to use the `distribution()` method. | ||
* `buckets`: *[array of numbers]* Tells `fast-stats` to maintain a histogram of your dataset using these custom buckets. | ||
Each number in the array is the upper limit of a bucket. The lower limit of the first bucket is 0, the lower limit | ||
for all other buckets is the upper limit of the previous bucket. | ||
If you use both `bucket_precision` and `buckets`, `buckets` takes precedence. | ||
* `store_data`: *[boolean]* Tells `fast-stats` not to store actual data values. This is useful to reduce memory utilisation | ||
for large datasets, however it comes with a few caveats. | ||
1. You can no longer get an exact median or other percentile value out of your dataset, however you could | ||
use bucketing (see `bucket_precision` above) to get an approximate percentile value. | ||
2. You can no longer run an exact `iqr` filter or a `band_pass` filter on the data, however you could use | ||
bucketing to get an approximate filtered object. | ||
3. You can no longer get at the entire dataset or remove data from the dataset. | ||
The mean, standard deviation and margin of error calculations are unaffected by this parameter. If you use | ||
bucketing, and only care about the mean, standard deviation and margin of error or an approximate median or | ||
percentile value, set this option to false. | ||
By default, `store_data` is `true`. | ||
### Getting data in and out | ||
@@ -53,9 +102,11 @@ | ||
The `Stats` constructor looks a lot like an array in the way you add and remove data to its ends, however there is | ||
no direct access to individual elements. The constructor takes in multiple values or a single list of values. All | ||
values must be numbers and behaviour is undefined if they are not. | ||
The `Stats` object looks a lot like an array in the way you add and remove data to its ends, however there is | ||
no direct access to individual elements. Data is added to the object using the `push()` and `unshift()` methods. | ||
All values must be numbers and behaviour is undefined if they are not. | ||
Additionally, the `push()` method may take in a list of values that will be added to the end of the current list and | ||
the `unshift()` method may take in a list of values that will be added to the beginning of the list. | ||
The `push()` method takes in a list of values that will be added to the end of the current list and | ||
the `unshift()` method takes in a list of values that will be added to the beginning of the list. | ||
Instead of passing in multiple parameters, you can also pass in an array of numbers as the first parameter. | ||
The following are equivalent. | ||
@@ -65,5 +116,5 @@ | ||
var s1, s2, s3, s4; | ||
s1 = new Stats(1, 2, 3, 10, 8, 4, 3); | ||
s1 = new Stats().push(1, 2, 3, 10, 8, 4, 3); | ||
s2 = new Stats([1, 2, 3, 10, 8, 4, 3]); | ||
s2 = new Stats().push([1, 2, 3, 10, 8, 4, 3]); | ||
@@ -89,3 +140,3 @@ s3 = new Stats(); | ||
The `push()` and `unshift()` methods return the new length of the object. | ||
The `push()` and `unshift()` methods return the `this` object. | ||
@@ -127,5 +178,12 @@ #### Removing data | ||
assert.equal(s1.length, s2.length); | ||
assert.equal(s3.length, s4.length); | ||
``` | ||
Additionally, the `copy()` method can create a new `Stats` object with a different configuration. | ||
This is most useful if you need to change bucket sizes or precision. Simply pass the new config | ||
object as a parameter to the `copy()` method: | ||
```javascript | ||
s4 = s3.copy({store_data: false, bucket_precision: 10 }); | ||
### Summaries & Averages | ||
@@ -172,2 +230,5 @@ | ||
If your Stats object is configured to use buckets and has `store_data` set to false, then the median will be an approximation | ||
of the actual median. | ||
#### Any Percentile | ||
@@ -193,2 +254,5 @@ | ||
If your Stats object is configured to use buckets and has `store_data` set to false, then the percentile value returned will | ||
be an approximation of the actual percentile based on the configured `bucket_precision` or `buckets`. | ||
#### Range | ||
@@ -206,4 +270,100 @@ | ||
``` | ||
#### Distribution | ||
The `distribution()` method tells you how your data is distributed. You need to set the `bucket_precision` or `buckets` | ||
configuration options if you plan on using this method. It will then split your data into buckets based on the value of | ||
`bucket_precision` or `buckets` and tell you how many data points fall into each bucket. You can use this to plot a | ||
histogram of your data, or to compare it to commonly known distribution functions. | ||
The return value is a sparse array of buckets with counts of datapoints per bucket. To save on memory, any empty buckets | ||
are undefined. You should treat an undefined bucket as if it had 0 datapoints. | ||
A bucket structure looks like this: | ||
```javascript | ||
{ | ||
bucket: <bucket midpoint>, | ||
range: [<bucket low>, <bucket high>], | ||
count: <number of datapoints> | ||
} | ||
``` | ||
Note that the upper bound of the `range` is open, ie, the range does not include the upper bound. | ||
```javascript | ||
var s7 = new Stats({bucket_precision: 10}); | ||
// Populate s7 with sequence of squares from 0-10 | ||
// 0 1 4 9 16 25 36 49 64 81 100 | ||
for(var i=0; i<=10; i++) | ||
s7.push(i*i); | ||
// distribution should be [4, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1] | ||
// but 0s are undefined to save on memory | ||
var d=s7.distribution(); | ||
// length should be one more than (max-min)/bucket_precision | ||
assert.equal(d.length, 11); | ||
d.forEach(function(e) { | ||
switch(e.bucket) { | ||
case 5: assert.equal(e.count, 4); // 0 1 4 9 | ||
break; | ||
case 15: assert.equal(e.count, 1); // 16 | ||
break; | ||
case 25: assert.equal(e.count, 1); // 25 | ||
break; | ||
case 35: assert.equal(e.count, 1); // 36 | ||
break; | ||
case 45: assert.equal(e.count, 1); // 49 | ||
break; | ||
case 55: assert.equal(e.count, 0); | ||
break; | ||
case 65: assert.equal(e.count, 1); // 64 | ||
break; | ||
case 75: assert.equal(e.count, 0); | ||
break; | ||
case 85: assert.equal(e.count, 1); // 81 | ||
break; | ||
case 95: assert.equal(e.count, 0); | ||
break; | ||
case 105: assert.equal(e.count, 1); // 100 | ||
break; | ||
default: assert.fail(e.bucket, "", "", "Unexpected bucket"); | ||
} | ||
}); | ||
``` | ||
Using custom buckets instead: | ||
```javascript | ||
var assert = require('assert'), | ||
Stats = require('fast-stats').Stats; | ||
var s1 = new Stats({buckets: [1, 2, 3, 5, 8, 13]}); | ||
for(var i=0; i<20; i++) | ||
s1.push(i); | ||
var d = s1.distribution(); | ||
d.forEach(function(e) { | ||
switch(e.bucket) { | ||
case 0.5: assert.equal(e.count, 1); // 0 | ||
break; | ||
case 1.5: assert.equal(e.count, 1); // 1 | ||
break; | ||
case 2.5: assert.equal(e.count, 1); // 2 | ||
break; | ||
case 4: assert.equal(e.count, 2); // 3, 4 | ||
break; | ||
case 6.5: assert.equal(e.count, 3); // 5, 6, 7 | ||
break; | ||
case 10.5: assert.equal(e.count, 5); // 8, 9, 10, 11, 12 | ||
break; | ||
case 16: assert.equal(e.count, 7); // 13, 14, 15, 16, 17, 18, 19 | ||
break; | ||
default: assert.fail(e.bucket, "", "", "Unexpected bucket"); | ||
} | ||
}); | ||
``` | ||
### Data Accuracy | ||
@@ -269,2 +429,9 @@ | ||
Note that if your Stats object is configured to use buckets and has `store_data` set to false, then all filtering | ||
will be done on an approximation of the data based on the configured value of `bucket_precision`. For example, | ||
if you have a set of numbers from 1-100 with `bucket_precision` set to 1, then filtering the dataset between 55 | ||
and 85 will get you a dataset between 55 and 85. If instead, `bucket_precision` is set to 10, then the filtered | ||
dataset will approximately range from 50 to 90. Note, however, that the `range()` method will attempt to match as | ||
closely as possible the real range. | ||
#### Band-pass filtering | ||
@@ -271,0 +438,0 @@ |
@@ -5,5 +5,5 @@ var assert = require('assert'), | ||
var s1, s2, s3, s4; | ||
s1 = new Stats(1, 2, 3, 10, 8, 4, 3); | ||
s1 = new Stats().push(1, 2, 3, 10, 8, 4, 3); | ||
s2 = new Stats([1, 2, 3, 10, 8, 4, 3]); | ||
s2 = new Stats().push([1, 2, 3, 10, 8, 4, 3]); | ||
@@ -37,3 +37,3 @@ s3 = new Stats(); | ||
assert.equal(s1.length, s2.length); | ||
assert.equal(s3.length, s4.length); | ||
@@ -92,1 +92,52 @@ var a = s1.amean(); | ||
assert.equal(r[1], 10); | ||
var s7 = new Stats({bucket_precision: 10}); | ||
// Populate s7 with sequence of squares from 0-10 | ||
// 0 1 4 9 16 25 36 49 64 81 100 | ||
for(var i=0; i<=10; i++) | ||
s7.push(i*i); | ||
// distribution should be [4, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1] | ||
// but 0s are undefined to save on memory | ||
var d=s7.distribution(); | ||
// length should be one more than (max-min)/bucket_precision | ||
assert.equal(d.length, 11); | ||
d.forEach(function(e) { | ||
switch(e.bucket) { | ||
case 5: assert.equal(e.count, 4); // 0 1 4 9 | ||
break; | ||
case 15: assert.equal(e.count, 1); // 16 | ||
break; | ||
case 25: assert.equal(e.count, 1); // 25 | ||
break; | ||
case 35: assert.equal(e.count, 1); // 36 | ||
break; | ||
case 45: assert.equal(e.count, 1); // 49 | ||
break; | ||
case 55: assert.equal(e.count, 0); | ||
break; | ||
case 65: assert.equal(e.count, 1); // 64 | ||
break; | ||
case 75: assert.equal(e.count, 0); | ||
break; | ||
case 85: assert.equal(e.count, 1); // 81 | ||
break; | ||
case 95: assert.equal(e.count, 0); | ||
break; | ||
case 105: assert.equal(e.count, 1); // 100 | ||
break; | ||
default: assert.fail(e.bucket, "", "", "Unexpected bucket"); | ||
} | ||
}); | ||
var s8 = new Stats({store_data: false, bucket_precision: 2}).push(1, 2, 3, 10, 8, 4, 3); | ||
var s9 = s8.copy() | ||
assert.equal(s9.length, s8.length); | ||
assert.equal(s9.buckets.length, s8.buckets.length); | ||
assert.equal(s9.amean(), s8.amean()); | ||
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
Non-existent author
Supply chain riskThe package was published by an npm account that no longer exists.
Found 1 instance in 1 package
63107
686
487
0
11
2