gaussianMixture
Advanced tools
Comparing version 0.7.0 to 0.8.0
353
index.js
@@ -9,5 +9,6 @@ 'use strict'; | ||
var MAX_ITERATIONS = 200; | ||
var LOG_LIKELIHOOD_TOL = 1e-7; | ||
var EPSILON = 1e-7; | ||
module.exports = GMM; | ||
module.exports.Histogram = Histogram; | ||
@@ -23,2 +24,3 @@ /** | ||
* 1 meaning that the prior has equal weight as the result of the optimal GMM in each EM step, 0 meaning no influence, and Infinity means a fixed variance (resp. separation). | ||
* @return {GMM} a gmm object | ||
* @example var gmm = new GMM(3, [0.3, 0.2, 0.5], [1, 2, 3], [1, 1, 0.5]); | ||
@@ -88,2 +90,24 @@ */ | ||
/** @private | ||
* Given a histogram, determine the membership for each bin and for each component of the GMM. | ||
* @param {Histogram} h histogram representing the data to find memberships for. | ||
* @param {Array} gaussians (optional) an Array of length nComponents that contains the gaussians for the GMM | ||
* @return {Object} a hash from key to memberships, where values are {Array} of length nComponents that sum to 1. | ||
*/ | ||
GMM.prototype._membershipsHistogram = function (h, gaussians) { | ||
var memberships = {}; | ||
if (!gaussians) gaussians = this._gaussians(); | ||
var keys = Object.keys(h.counts); | ||
for (var i = 0, n = keys.length; i < n; i++) { | ||
var k = keys[i]; | ||
var v = h.value(k); | ||
memberships[k] = this.membership(v, gaussians); | ||
} | ||
return memberships; | ||
}; | ||
/** | ||
@@ -108,3 +132,3 @@ * Given a datapoint, determine its memberships for each component of the GMM. | ||
/** | ||
/** @private | ||
* Perform one expectation-maximization step and update the GMM weights, means and variances in place. | ||
@@ -115,3 +139,3 @@ * Optionally, if options.variancePrior and options.priorRelevance are defined, mix in the prior. | ||
*/ | ||
GMM.prototype.updateModel = function (data, memberships) { | ||
GMM.prototype._updateModel = function (data, memberships) { | ||
// First, we compute the data memberships. | ||
@@ -125,3 +149,3 @@ var n = data.length; | ||
var reduceFunction = function (k) { return function (a, b) { return (a + b[k]); }; }; | ||
for (var k = 0; k < this.nComponents; k++) { | ||
for (let k = 0; k < this.nComponents; k++) { | ||
componentWeights[k] = memberships.reduce(reduceFunction(k), 0); | ||
@@ -132,5 +156,5 @@ } | ||
// Update the mixture means | ||
for (k = 0; k < this.nComponents; k++) { | ||
for (let k = 0; k < this.nComponents; k++) { | ||
this.means[k] = 0; | ||
for (var i = 0; i < n; i++) { | ||
for (let i = 0; i < n; i++) { | ||
this.means[k] += memberships[i][k] * data[i]; | ||
@@ -146,3 +170,3 @@ } | ||
var center = GMM._barycenter(this.means, this.weights); | ||
for (k = 0; k < this.nComponents; k++) { | ||
for (let k = 0; k < this.nComponents; k++) { | ||
alpha = this.weights[k] / (this.weights[k] + this.options.separationPriorRelevance); | ||
@@ -154,5 +178,5 @@ this.means[k] = center + alpha * (this.means[k] - center) + (1 - alpha) * (priorMeans[k] - priorCenter); | ||
// Update the mixture variances | ||
for (k = 0; k < this.nComponents; k++) { | ||
this.vars[k] = LOG_LIKELIHOOD_TOL; // initialize to some epsilon to avoid zero variance problems. | ||
for (i = 0; i < n; i++) { | ||
for (let k = 0; k < this.nComponents; k++) { | ||
this.vars[k] = EPSILON; // initialize to some epsilon to avoid zero variance problems. | ||
for (let i = 0; i < n; i++) { | ||
this.vars[k] += memberships[i][k] * (data[i] - this.means[k]) * (data[i] - this.means[k]); | ||
@@ -170,13 +194,89 @@ } | ||
/** @private | ||
* Compute the [log-likelihood](https://en.wikipedia.org/wiki/Likelihood_function#Log-likelihood) for the GMM given an array of memberships. | ||
* @param {Array} memberships the memberships array, matrix of size n * this.nComponents, where n is the size of the data. | ||
* Perform one expectation-maximization step and update the GMM weights, means and variances in place. | ||
* Optionally, if options.variancePrior and options.priorRelevance are defined, mix in the prior. | ||
* @param {Histogram} h histogram representing the data. | ||
* @param {Array} memberships the memberships object for the given histogram (optional). | ||
*/ | ||
GMM.prototype._updateModelHistogram = function (h, memberships) { | ||
// First, we compute the data memberships. | ||
var n = h.total; | ||
if (!memberships) memberships = this._membershipsHistogram(h); | ||
var alpha; | ||
var keys = Object.keys(h.counts); | ||
// Update the mixture weights | ||
var componentWeights = []; | ||
var reduceFunction = function (k) { return function (a, b) { return (a + memberships[b][k] * h.counts[b]); }; }; | ||
for (let k = 0; k < this.nComponents; k++) { | ||
componentWeights[k] = keys.reduce(reduceFunction(k), 0); | ||
} | ||
this.weights = componentWeights.map(function (a) { return a / n; }); | ||
// Update the mixture means | ||
for (let k = 0; k < this.nComponents; k++) { | ||
this.means[k] = 0; | ||
for (let i = 0; i < keys.length; i++) { | ||
let key = keys[i]; | ||
let v = h.value(key); | ||
this.means[k] += memberships[key][k] * v * h.counts[key]; | ||
} | ||
this.means[k] /= componentWeights[k]; | ||
} | ||
// If there is a separation prior: | ||
if (this.options.separationPrior && this.options.separationPriorRelevance) { | ||
var separationPrior = this.options.separationPrior; | ||
var priorMeans = _.range(this.nComponents).map(function (a) { return (a * separationPrior); }); | ||
var priorCenter = GMM._barycenter(priorMeans, this.weights); | ||
var center = GMM._barycenter(this.means, this.weights); | ||
for (let k = 0; k < this.nComponents; k++) { | ||
alpha = this.weights[k] / (this.weights[k] + this.options.separationPriorRelevance); | ||
this.means[k] = center + alpha * (this.means[k] - center) + (1 - alpha) * (priorMeans[k] - priorCenter); | ||
} | ||
} | ||
// Update the mixture variances | ||
for (let k = 0; k < this.nComponents; k++) { | ||
this.vars[k] = EPSILON; // initialize to some epsilon to avoid zero variance problems. | ||
for (let i = 0; i < keys.length; i++) { | ||
let key = keys[i]; | ||
let v = h.value(key); | ||
this.vars[k] += memberships[key][k] * (v - this.means[k]) * (v - this.means[k]) * h.counts[key]; | ||
} | ||
this.vars[k] /= componentWeights[k]; | ||
// If there is a variance prior: | ||
if (this.options.variancePrior && this.options.variancePriorRelevance) { | ||
alpha = this.weights[k] / (this.weights[k] + this.options.variancePriorRelevance); | ||
this.vars[k] = alpha * this.vars[k] + (1 - alpha) * this.options.variancePrior; | ||
} | ||
} | ||
}; | ||
/** | ||
* Compute the [log-likelihood](https://en.wikipedia.org/wiki/Likelihood_function#Log-likelihood) for the GMM given data. | ||
* @param {(Array|Histogram)} data the data array or histogram | ||
* @return {Number} the log-likelihood | ||
*/ | ||
GMM.prototype._logLikelihoodMemberships = function (memberships) { | ||
GMM.prototype.logLikelihood = function (data) { | ||
if (Array.isArray(data)) return this._logLikelihood(data); | ||
if (Histogram.prototype.isPrototypeOf(data)) return this._logLikelihoodHistogram(data); | ||
throw new Error('Data must be an Array of a Histogram.'); | ||
}; | ||
/** @private | ||
* Compute the [log-likelihood](https://en.wikipedia.org/wiki/Likelihood_function#Log-likelihood) for the GMM given an array of data. | ||
* @param {Array} data the data array | ||
* @return {Number} the log-likelihood | ||
*/ | ||
GMM.prototype._logLikelihood = function (data) { | ||
var l = 0; | ||
var p = 0; | ||
for (var i = 0, n = memberships.length; i < n; i++) { | ||
var gaussians = this._gaussians(); | ||
for (var i = 0, n = data.length; i < n; i++) { | ||
p = 0; | ||
for (var k = 0; k < this.nComponents; k++) { | ||
p += this.weights[k] * memberships[i][k]; | ||
p += this.weights[k] * gaussians[k].pdf(data[i]); | ||
} | ||
@@ -192,19 +292,47 @@ if (p === 0) { | ||
/** | ||
* Compute the [log-likelihood](https://en.wikipedia.org/wiki/Likelihood_function#Log-likelihood) for the GMM given an array of data. | ||
* @param {Array} data the data array | ||
/** @private | ||
* Compute the [log-likelihood](https://en.wikipedia.org/wiki/Likelihood_function#Log-likelihood) for the GMM given a histogram. | ||
* @param {Histogram} h the data histogram | ||
* @return {Number} the log-likelihood | ||
*/ | ||
GMM.prototype.logLikelihood = function (data) { | ||
GMM.prototype._logLikelihoodHistogram = function (h) { | ||
var l = 0; | ||
var p = 0; | ||
var gaussians = this._gaussians(); | ||
for (var i = 0, n = data.length; i < n; i++) { | ||
var keys = Object.keys(h.counts); | ||
for (var i = 0, n = keys.length; i < n; i++) { | ||
p = 0; | ||
let key = keys[i]; | ||
let v = h.value(key); | ||
for (var k = 0; k < this.nComponents; k++) { | ||
p += this.weights[k] * gaussians[k].pdf(data[i]); | ||
p += this.weights[k] * gaussians[k].pdf(v); | ||
} | ||
if (p === 0) { | ||
return -Infinity; | ||
} else { | ||
l += Math.log(p) * h.counts[key]; | ||
} | ||
} | ||
return l; | ||
}; | ||
/** @private | ||
* Compute the [log-likelihood](https://en.wikipedia.org/wiki/Likelihood_function#Log-likelihood) for the GMM given an array of memberships. | ||
* @param {Array} memberships the memberships array, matrix of size n * this.nComponents, where n is the size of the data. | ||
* @return {Number} the log-likelihood | ||
*/ | ||
GMM.prototype._logLikelihoodMemberships = function (memberships) { | ||
var l = 0; | ||
for (var i = 0, n = memberships.length; i < n; i++) { | ||
var p = 0; | ||
for (var k = 0; k < this.nComponents; k++) { | ||
p += this.weights[k] * memberships[i][k]; | ||
} | ||
if (p === 0) { | ||
return -Infinity; | ||
} else { | ||
l += Math.log(p); | ||
@@ -221,2 +349,21 @@ } | ||
* The initialization is agnostic to the other priors that the options might contain. | ||
* The `initialize` flag is unavailable with the histogram version of this function | ||
* @param {(Array|Histogram)} data the data array or histogram | ||
* @param {Number} [maxIterations=200] maximum number of expectation-maximization steps | ||
* @param {Number} [logLikelihoodTol=0.0000001] tolerance for the log-likelihood | ||
* to determine if we reached the optimum | ||
* @return {Number} the number of steps to reach the converged solution | ||
*/ | ||
GMM.prototype.optimize = function (data, maxIterations, logLikelihoodTol) { | ||
if (Array.isArray(data)) return this._optimize(data, maxIterations, logLikelihoodTol); | ||
if (Histogram.prototype.isPrototypeOf(data)) return this._optimizeHistogram(data, maxIterations, logLikelihoodTol); | ||
throw new Error('Data must be an Array of a Histogram.'); | ||
}; | ||
/** @private | ||
* Compute the optimal GMM components given an array of data. | ||
* If options has a true flag for `initialize`, the optimization will begin with a K-means++ initialization. | ||
* This allows to have a data-dependent initialization and should converge quicker and to a better model. | ||
* The initialization is agnostic to the other priors that the options might contain. | ||
* @param {Array} data array of numbers representing the samples to use to optimize the model | ||
@@ -228,3 +375,3 @@ * @param {Number} [maxIterations=200] maximum number of expectation-maximization steps | ||
* @example | ||
var gmm = new GMM(3, undefined, [1, 5, 10], {initialize: true}); | ||
var gmm = new GMM(3, undefined, [1, 5, 10], [1, 1, 1], {initialize: true}); | ||
var data = [1.2, 1.3, 7.4, 1.4, 14.3, 15.3, 1.0, 7.2]; | ||
@@ -234,7 +381,7 @@ gmm.optimize(data); // updates weights, means and variances with the EM algorithm given the data. | ||
*/ | ||
GMM.prototype.optimize = function (data, maxIterations, logLikelihoodTol) { | ||
GMM.prototype._optimize = function (data, maxIterations, logLikelihoodTol) { | ||
if (this.options.initialize) this.initialize(data); | ||
maxIterations = maxIterations === undefined ? MAX_ITERATIONS : maxIterations; | ||
logLikelihoodTol = logLikelihoodTol === undefined ? LOG_LIKELIHOOD_TOL : logLikelihoodTol; | ||
logLikelihoodTol = logLikelihoodTol === undefined ? EPSILON : logLikelihoodTol; | ||
var logLikelihoodDiff = Infinity; | ||
@@ -245,3 +392,3 @@ var logLikelihood = -Infinity; | ||
for (var i = 0; i < maxIterations && logLikelihoodDiff > logLikelihoodTol; i++) { | ||
this.updateModel(data, memberships); | ||
this._updateModel(data, memberships); | ||
memberships = this.memberships(data); | ||
@@ -255,2 +402,32 @@ temp = this._logLikelihoodMemberships(memberships); | ||
/** @private | ||
* Compute the optimal GMM components given a histogram of data. | ||
* K-means++ initialization is not implemented for the histogram version of this function. | ||
* @param {Histogram} h histogram of data used to optimize the model | ||
* @param {Number} [maxIterations=200] maximum number of expectation-maximization steps | ||
* @param {Number} [logLikelihoodTol=0.0000001] tolerance for the log-likelihood | ||
* to determine if we reached the optimum | ||
* @return {Number} the number of steps to reach the converged solution | ||
* @example | ||
var gmm = new GMM(3, undefined, [1, 5, 10], [1, 1, 1]); | ||
var h = Histogram.fromData([1.2, 1.3, 7.4, 1.4, 14.3, 15.3, 1.0, 7.2]); | ||
gmm.optimizeHistogram(h); // updates weights, means and variances with the EM algorithm given the data. | ||
console.log(gmm.means); // >> [1.225, 7.3, 14.8] | ||
*/ | ||
GMM.prototype._optimizeHistogram = function (h, maxIterations, logLikelihoodTol) { | ||
maxIterations = maxIterations === undefined ? MAX_ITERATIONS : maxIterations; | ||
logLikelihoodTol = logLikelihoodTol === undefined ? EPSILON : logLikelihoodTol; | ||
var logLikelihoodDiff = Infinity; | ||
var logLikelihood = -Infinity; | ||
var temp; | ||
for (var i = 0; i < maxIterations && logLikelihoodDiff > logLikelihoodTol; i++) { | ||
this._updateModelHistogram(h); | ||
temp = this._logLikelihoodHistogram(h); | ||
logLikelihoodDiff = Math.abs(logLikelihood - temp); | ||
logLikelihood = temp; | ||
} | ||
return i; | ||
}; | ||
/** | ||
@@ -359,1 +536,127 @@ * Initialize the GMM given data with the [K-means++](https://en.wikipedia.org/wiki/K-means%2B%2B) initialization algorithm. | ||
}; | ||
/** | ||
* Instantiate a new Histogram. | ||
* @param {Object} [h={}] an object with keys 'counts' and 'bins'. Both are optional. | ||
* An observation x will be counted for the key i if bins[i][0] <= x < bins[i][1]. | ||
* If bins are not specified, the bins will be corresponding to one unit in the scale of the data. | ||
* The keys of the 'counts' hash will be stringified integers. | ||
* @return {Histogram} a histogram object. | ||
* It has keys 'bins' (possibly null) and 'counts'. | ||
* @example var h = new Histogram({counts: {'a': 3, 'b': 2, 'c': 5}, bins: {'a': [0, 2], 'b': [2, 4], 'c': [4, 7]}}); | ||
* @example var h = new Histogram({counts: {'1': 3, '2': 2, '3': 5}}); | ||
* @example var h = new Histogram(); | ||
*/ | ||
function Histogram(h) { | ||
h = h || {}; | ||
this.bins = h.bins || null; | ||
this.counts = h.counts || {}; | ||
this.total = Histogram._total(h); | ||
} | ||
/** @private | ||
* Get the key corresponding to a single element. | ||
* @param {Array} x observation to classify in the histogram | ||
* @param {Object} [bins=undefined] a map from key to range (a range being an array of two elements) | ||
* An observation x will be counted for the key i if `bins[i][0] <= x < bins[i][1]`. | ||
* If not specified, the bins will be corresponding to one unit in the scale of the data. | ||
* @return {String} the key to add the observation in the histogram | ||
*/ | ||
Histogram._classify = function (x, bins) { | ||
if (bins === null || bins === undefined) return Math.round(x).toString(); | ||
var keys = Object.keys(bins); | ||
for (var i = 0, n = keys.length; i < n; i++) { | ||
var bounds = bins[keys[i]]; | ||
if (bounds[0] <= x && x < bounds[1]) return keys[i]; | ||
} | ||
return null; | ||
}; | ||
/** @private | ||
* Get the total count for the histogram | ||
* @param {Histogram} h a histogram | ||
* @return {Number} the total count for the histogram | ||
*/ | ||
Histogram._total = function (h) { | ||
return Object.keys(h.counts) | ||
.map(function (k) { return h.counts[k]; }) | ||
.reduce(function (a, b) { return a + b; }, 0); | ||
}; | ||
/** | ||
* Add an observation to an histogram. | ||
* @param {Array} x observation to add tos the histogram | ||
* @return {Histogram} the histogram with added value. | ||
*/ | ||
Histogram.prototype.add = function (x) { | ||
var c = Histogram._classify(x, this.bins); | ||
if (c !== null) { | ||
if (!this.counts[c]) this.counts[c] = 1; | ||
else this.counts[c] += 1; | ||
this.total += 1; | ||
} | ||
return this; | ||
}; | ||
/** | ||
* Return a data array from a histogram. | ||
* @return {Array} an array of observations derived from the histogram counts. | ||
*/ | ||
Histogram.prototype.flatten = function () { | ||
var r = []; | ||
var keys = Object.keys(this.counts); | ||
for (var i = 0, n = keys.length; i < n; i++) { | ||
var k = keys[i]; | ||
var v = this.value(k); | ||
for (var j = 0; j < this.counts[k]; j++) { | ||
r.push(v); | ||
} | ||
} | ||
return r; | ||
}; | ||
/** | ||
* Instantiate a new Histogram. | ||
* @param {Array} [data=[]] array of observations to include in the histogram. | ||
* Observations that do not correspond to any bin will be discarded. | ||
* @param {Object} [bins={}] a map from key to range (a range being an array of two elements) | ||
* An observation x will be counted for the key i if `bins[i][0] <= x < bins[i][1]`. | ||
* If not specified, the bins will be corresponding to one unit in the scale of the data. | ||
* @return {Histogram} a histogram object | ||
* It has keys 'bins' (possibly null) and 'counts'. | ||
* @example var h = Histogram.fromData([1, 2, 2, 2, 5, 5], {A: [0, 1], B: [1, 5], C: [5, 10]}); | ||
// {bins: {A: [0, 1], B: [1, 5], C: [5, 10]}, counts: {A: 0, B: 4, C: 2}} | ||
* @example var h = Histogram.fromData([1, 2, 2, 2, 2.4, 2.5, 5, 5]); | ||
// {counts: {'1': 1, '2': 4, '3': 1, '5': 2}} | ||
*/ | ||
Histogram.fromData = function (data, bins) { | ||
var h = new Histogram({bins: bins, counts: {}}); | ||
for (var i = 0, n = data.length; i < n; i++) { | ||
h.add(data[i]); | ||
} | ||
return h; | ||
}; | ||
/** | ||
* Return the median value for the given key, derived from the bins. | ||
* @return {Number} the value for the provided key. | ||
*/ | ||
Histogram.prototype.value = function (key) { | ||
if (this.bins) { | ||
if (this.bins[key]) return (this.bins[key][0] + this.bins[key][1]) / 2; | ||
else throw new Error('No bin for this key.'); | ||
} else { | ||
return Number(key); | ||
} | ||
}; |
{ | ||
"name": "gaussianMixture", | ||
"version": "0.7.0", | ||
"version": "0.8.0", | ||
"description": "An implementation of a Gaussian Mixture class in one dimension, that allows to fit models with an Expectation Maximization algorithm.", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
139
README.md
@@ -24,3 +24,3 @@ # Gaussian Mixture | ||
[index.js:24-35](https://github.com/benjamintd/gaussian-mixture/blob/a6bb0a3f969eae8f65b443cfaa64faa00c34fb16/index.js#L24-L35 "Source code on GitHub") | ||
[index.js:26-37](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L26-L37 "Source code on GitHub") | ||
@@ -45,5 +45,7 @@ Instantiate a new GMM. | ||
Returns **[GMM](#gmm)** a gmm object | ||
## sample | ||
[index.js:55-69](https://github.com/benjamintd/gaussian-mixture/blob/a6bb0a3f969eae8f65b443cfaa64faa00c34fb16/index.js#L55-L69 "Source code on GitHub") | ||
[index.js:57-71](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L57-L71 "Source code on GitHub") | ||
@@ -60,3 +62,3 @@ Randomly sample from the GMM's distribution. | ||
[index.js:77-84](https://github.com/benjamintd/gaussian-mixture/blob/a6bb0a3f969eae8f65b443cfaa64faa00c34fb16/index.js#L77-L84 "Source code on GitHub") | ||
[index.js:79-86](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L79-L86 "Source code on GitHub") | ||
@@ -74,3 +76,3 @@ Given an array of data, determine their memberships for each component of the GMM. | ||
[index.js:92-103](https://github.com/benjamintd/gaussian-mixture/blob/a6bb0a3f969eae8f65b443cfaa64faa00c34fb16/index.js#L92-L103 "Source code on GitHub") | ||
[index.js:116-127](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L116-L127 "Source code on GitHub") | ||
@@ -86,23 +88,11 @@ Given a datapoint, determine its memberships for each component of the GMM. | ||
## updateModel | ||
[index.js:111-158](https://github.com/benjamintd/gaussian-mixture/blob/a6bb0a3f969eae8f65b443cfaa64faa00c34fb16/index.js#L111-L158 "Source code on GitHub") | ||
Perform one expectation-maximization step and update the GMM weights, means and variances in place. | ||
Optionally, if options.variancePrior and options.priorRelevance are defined, mix in the prior. | ||
**Parameters** | ||
- `data` **[Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array)** array of numbers representing the samples to use to update the model | ||
- `memberships` **[Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array)** the memberships array for the given data (optional). | ||
## logLikelihood | ||
[index.js:187-203](https://github.com/benjamintd/gaussian-mixture/blob/a6bb0a3f969eae8f65b443cfaa64faa00c34fb16/index.js#L187-L203 "Source code on GitHub") | ||
[index.js:252-257](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L252-L257 "Source code on GitHub") | ||
Compute the [log-likelihood](https://en.wikipedia.org/wiki/Likelihood_function#Log-likelihood) for the GMM given an array of data. | ||
Compute the [log-likelihood](https://en.wikipedia.org/wiki/Likelihood_function#Log-likelihood) for the GMM given data. | ||
**Parameters** | ||
- `data` **[Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array)** the data array | ||
- `data` **([Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array) \| [Histogram](#histogram))** the data array or histogram | ||
@@ -113,3 +103,3 @@ Returns **[Number](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number)** the log-likelihood | ||
[index.js:221-238](https://github.com/benjamintd/gaussian-mixture/blob/a6bb0a3f969eae8f65b443cfaa64faa00c34fb16/index.js#L221-L238 "Source code on GitHub") | ||
[index.js:345-350](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L345-L350 "Source code on GitHub") | ||
@@ -120,6 +110,7 @@ Compute the optimal GMM components given an array of data. | ||
The initialization is agnostic to the other priors that the options might contain. | ||
The `initialize` flag is unavailable with the histogram version of this function | ||
**Parameters** | ||
- `data` **[Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array)** array of numbers representing the samples to use to optimize the model | ||
- `data` **([Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array) \| [Histogram](#histogram))** the data array or histogram | ||
- `maxIterations` **[Number](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number)?** maximum number of expectation-maximization steps (optional, default `200`) | ||
@@ -129,11 +120,2 @@ - `logLikelihoodTol` **[Number](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number)?** tolerance for the log-likelihood | ||
**Examples** | ||
```javascript | ||
var gmm = new GMM(3, undefined, [1, 5, 10], {initialize: true}); | ||
var data = [1.2, 1.3, 7.4, 1.4, 14.3, 15.3, 1.0, 7.2]; | ||
gmm.optimize(data); // updates weights, means and variances with the EM algorithm given the data. | ||
console.log(gmm.means); // >> [1.225, 7.3, 14.8] | ||
``` | ||
Returns **[Number](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number)** the number of steps to reach the converged solution | ||
@@ -143,3 +125,3 @@ | ||
[index.js:251-293](https://github.com/benjamintd/gaussian-mixture/blob/a6bb0a3f969eae8f65b443cfaa64faa00c34fb16/index.js#L251-L293 "Source code on GitHub") | ||
[index.js:428-470](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L428-L470 "Source code on GitHub") | ||
@@ -166,3 +148,3 @@ Initialize the GMM given data with the [K-means++](https://en.wikipedia.org/wiki/K-means%2B%2B) initialization algorithm. | ||
[index.js:315-322](https://github.com/benjamintd/gaussian-mixture/blob/a6bb0a3f969eae8f65b443cfaa64faa00c34fb16/index.js#L315-L322 "Source code on GitHub") | ||
[index.js:492-499](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L492-L499 "Source code on GitHub") | ||
@@ -175,3 +157,3 @@ Return the model for the GMM as a raw JavaScript Object. | ||
[index.js:334-342](https://github.com/benjamintd/gaussian-mixture/blob/a6bb0a3f969eae8f65b443cfaa64faa00c34fb16/index.js#L334-L342 "Source code on GitHub") | ||
[index.js:511-519](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L511-L519 "Source code on GitHub") | ||
@@ -197,1 +179,92 @@ Instantiate a GMM from an Object model and options. | ||
Returns **[GMM](#gmm)** the GMM corresponding to the given model | ||
# Histogram | ||
[index.js:533-538](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L533-L538 "Source code on GitHub") | ||
Instantiate a new Histogram. | ||
**Parameters** | ||
- `h` **[Object](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object)?** an object with keys 'counts' and 'bins'. Both are optional. | ||
An observation x will be counted for the key i if bins[i][0] <= x < bins[i][1]. | ||
If bins are not specified, the bins will be corresponding to one unit in the scale of the data. | ||
The keys of the 'counts' hash will be stringified integers. (optional, default `{}`) | ||
**Examples** | ||
```javascript | ||
var h = new Histogram({counts: {'a': 3, 'b': 2, 'c': 5}, bins: {'a': [0, 2], 'b': [2, 4], 'c': [4, 7]}}); | ||
``` | ||
```javascript | ||
var h = new Histogram({counts: {'1': 3, '2': 2, '3': 5}}); | ||
``` | ||
```javascript | ||
var h = new Histogram(); | ||
``` | ||
Returns **[Histogram](#histogram)** a histogram object. | ||
It has keys 'bins' (possibly null) and 'counts'. | ||
## add | ||
[index.js:577-587](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L577-L587 "Source code on GitHub") | ||
Add an observation to an histogram. | ||
**Parameters** | ||
- `x` **[Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array)** observation to add tos the histogram | ||
Returns **[Histogram](#histogram)** the histogram with added value. | ||
## flatten | ||
[index.js:593-608](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L593-L608 "Source code on GitHub") | ||
Return a data array from a histogram. | ||
Returns **[Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array)** an array of observations derived from the histogram counts. | ||
## value | ||
[index.js:638-645](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L638-L645 "Source code on GitHub") | ||
Return the median value for the given key, derived from the bins. | ||
**Parameters** | ||
- `key` | ||
Returns **[Number](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number)** the value for the provided key. | ||
## fromData | ||
[index.js:624-632](https://github.com/benjamintd/gaussian-mixture/blob/dbacb0ace6f5cff11cfdbbaaeb3ccce867fcba45/index.js#L624-L632 "Source code on GitHub") | ||
Instantiate a new Histogram. | ||
**Parameters** | ||
- `data` **[Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array)?** array of observations to include in the histogram. | ||
Observations that do not correspond to any bin will be discarded. (optional, default `[]`) | ||
- `bins` **[Object](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object)?** a map from key to range (a range being an array of two elements) | ||
An observation x will be counted for the key i if `bins[i][0] <= x < bins[i][1]`. | ||
If not specified, the bins will be corresponding to one unit in the scale of the data. (optional, default `{}`) | ||
**Examples** | ||
```javascript | ||
var h = Histogram.fromData([1, 2, 2, 2, 5, 5], {A: [0, 1], B: [1, 5], C: [5, 10]}); | ||
// {bins: {A: [0, 1], B: [1, 5], C: [5, 10]}, counts: {A: 0, B: 4, C: 2}} | ||
``` | ||
```javascript | ||
var h = Histogram.fromData([1, 2, 2, 2, 2.4, 2.5, 5, 5]); | ||
// {counts: {'1': 1, '2': 4, '3': 1, '5': 2}} | ||
``` | ||
Returns **[Histogram](#histogram)** a histogram object | ||
It has keys 'bins' (possibly null) and 'counts'. |
@@ -7,2 +7,3 @@ 'use strict'; | ||
var GMM = require('../index'); | ||
var Histogram = require('../index').Histogram; | ||
@@ -65,3 +66,3 @@ test('Initialization of a new GMM object.', function (t) { | ||
for (var i = 0; i < 200; i++) { | ||
testGmm.updateModel(data); | ||
testGmm._updateModel(data); | ||
} | ||
@@ -82,3 +83,3 @@ for (var j = 0; j < 3; j++) { | ||
for (var i = 0; i < 15; i++) { | ||
gmm.updateModel(data); | ||
gmm._updateModel(data); | ||
temp = gmm.logLikelihood(data); | ||
@@ -97,3 +98,3 @@ t.equals(temp - l >= -1e-5, true); | ||
for (var i = 0; i < 200; i++) { | ||
gmm.updateModel(data); | ||
gmm._updateModel(data); | ||
} | ||
@@ -209,1 +210,110 @@ var counter = gmm2.optimize(data); | ||
}); | ||
test('memberships - histogram', function (t) { | ||
var h = Histogram.fromData([1, 2, 5, 5.4, 5.5, 6, 7, 7]); | ||
var gmm = GMM.fromModel({ | ||
means: [1, 5, 7], | ||
vars: [2, 2, 2], | ||
weights: [0.3, 0.5, 0.2], | ||
nComponents: 3 | ||
}); | ||
t.same(gmm._membershipsHistogram(h), { | ||
1: [0.9818947940807183, 0.01798403047511045, 0.00012117544417123207], | ||
2: [0.8788782427321509, 0.11894323591065209, 0.0021785213571970234], | ||
5: [0.013212886953789417, 0.7213991842739687, 0.265387928772242], | ||
6: [0.0012378419366357771, 0.49938107903168216, 0.49938107903168216], | ||
7: [0.00009021165708731931, 0.268917159718714, 0.7309926286241988] | ||
}); | ||
t.end(); | ||
}); | ||
test('log likelihood - histogram', function (t) { | ||
var h = Histogram.fromData([1, 2, 5, 5, 5, 6, 7, 7]); | ||
var gmm = GMM.fromModel({ | ||
means: [1, 5, 7], | ||
vars: [2, 2, 2], | ||
weights: [0.3, 0.5, 0.2], | ||
nComponents: 3 | ||
}); | ||
t.equal(gmm.logLikelihood(h), gmm.logLikelihood([1, 2, 5, 5, 5, 6, 7, 7])); | ||
t.end(); | ||
}); | ||
test('optimize - histogram', function (t) { | ||
var h = Histogram.fromData([1, 2, 5, 5, 5, 6, 7, 7]); | ||
var gmm = GMM.fromModel({ | ||
means: [1, 5, 7], | ||
vars: [2, 2, 2], | ||
weights: [0.3, 0.5, 0.2], | ||
nComponents: 3 | ||
}); | ||
var gmm2 = GMM.fromModel({ | ||
means: [1, 5, 7], | ||
vars: [2, 2, 2], | ||
weights: [0.3, 0.5, 0.2], | ||
nComponents: 3 | ||
}); | ||
gmm._optimizeHistogram(h); | ||
gmm2._optimize([1, 2, 5, 5, 5, 6, 7, 7]); | ||
var round = x => Number(x.toFixed(5)); | ||
t.same(gmm.model().means.map(round), gmm2.model().means.map(round)); | ||
t.same(gmm.model().vars.map(round), gmm2.model().vars.map(round)); | ||
t.same(gmm.model().weights.map(round), gmm2.model().weights.map(round)); | ||
var options = { | ||
variancePrior: 3, | ||
variancePriorRelevance: 0.5, | ||
separationPrior: 3, | ||
separationPriorRelevance: 1 | ||
}; | ||
gmm.options = options; | ||
gmm2.options = options; | ||
gmm.optimize(h); | ||
gmm2._optimize([1, 2, 5, 5, 5, 6, 7, 7]); | ||
t.same(gmm.model().means.map(round), gmm2.model().means.map(round)); | ||
t.same(gmm.model().vars.map(round), gmm2.model().vars.map(round)); | ||
t.same(gmm.model().weights.map(round), gmm2.model().weights.map(round)); | ||
t.end(); | ||
}); | ||
test('histogram total', function (t) { | ||
var d = [1, 2, 3, 4, 5, 5, 6, 6, 6]; | ||
var h = Histogram.fromData(d); | ||
t.equals(Histogram._total(h), 9); | ||
t.end(); | ||
}); | ||
test('histogram classify', function (t) { | ||
t.equals(Histogram._classify(3.4), '3'); | ||
t.equals(Histogram._classify(3.4, {'A': [1, 2], 'B': [3, 3.4], 'C': [3.4, 5], 'D': [5, 6]}), 'C'); | ||
t.same(Histogram._classify(7, {'A': [1, 2], 'B': [3, 3.4], 'C': [3.4, 5], 'D': [5, 6]}), null); | ||
t.end(); | ||
}); | ||
test('histogram value', function (t) { | ||
var h = new Histogram({ | ||
bins: {'A': [1, 2], 'B': [3, 3.4], 'C': [3.4, 5], 'D': [5, 6]}, | ||
counts: {'A': 5, 'B': 3} | ||
}); | ||
t.equals(h.value('A'), 1.5); | ||
t.equals(h.value('B'), 3.2); | ||
t.throws(() => h.value('E')); | ||
t.end(); | ||
}); | ||
test('histogram flatten', function (t) { | ||
var h = new Histogram({ | ||
bins: {'A': [1, 2], 'B': [3, 3.4], 'C': [3.4, 5], 'D': [5, 6]}, | ||
counts: {'A': 3, 'B': 2} | ||
}); | ||
t.same(h.flatten(), [1.5, 1.5, 1.5, 3.2, 3.2]); | ||
t.end(); | ||
}); |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
50801
835
259
0