gaussianMixture
Advanced tools
Comparing version 0.1.0 to 0.2.0
145
index.js
@@ -1,14 +0,14 @@ | ||
'use strict' | ||
'use strict'; | ||
// Imports | ||
var gaussian = require('gaussian') | ||
var _ = require('underscore') | ||
var gaussian = require('gaussian'); | ||
var _ = require('underscore'); | ||
// Constants | ||
var MAX_ITERATIONS = 200 | ||
var LOG_LIKELIHOOD_TOL = 1e-7 | ||
var MAX_ITERATIONS = 200; | ||
var LOG_LIKELIHOOD_TOL = 1e-7; | ||
module.exports = GMM | ||
module.exports = GMM; | ||
function GMM (nComponents, weights, means, vars) { | ||
function GMM(nComponents, weights, means, vars, options) { | ||
// nComponents: number of components in the mixture | ||
@@ -18,20 +18,21 @@ // weights: array of weights of each component in the mixture, must sum to 1 | ||
// vars: array of variances of each component | ||
this.nComponents = nComponents | ||
this.weights = weights === undefined ? _.range(nComponents).map(function () { return 1 / nComponents }) : weights | ||
this.means = means === undefined ? _.range(nComponents) : means | ||
this.vars = vars === undefined ? _.range(nComponents).map(function () { return 1 }) : vars | ||
this.nComponents = nComponents; | ||
this.weights = weights === undefined ? _.range(nComponents).map(function () { return 1 / nComponents; }) : weights; | ||
this.means = means === undefined ? _.range(nComponents) : means; | ||
this.vars = vars === undefined ? _.range(nComponents).map(function () { return 1; }) : vars; | ||
if (nComponents !== this.weights.length || | ||
nComponents !== this.means.length || | ||
nComponents !== this.vars.length) { | ||
throw new Error('weights, means and vars must have nComponents elements.') | ||
nComponents !== this.means.length || | ||
nComponents !== this.vars.length) { | ||
throw new Error('weights, means and vars must have nComponents elements.'); | ||
} | ||
this.options = options === undefined ? {} : options; | ||
} | ||
GMM.prototype._gaussians = function () { | ||
var gaussians = [] | ||
var gaussians = []; | ||
for (var k = 0; k < this.nComponents; k++) { | ||
gaussians.push(gaussian(this.means[k], this.vars[k])) | ||
gaussians.push(gaussian(this.means[k], this.vars[k])); | ||
} | ||
return gaussians | ||
} | ||
return gaussians; | ||
}; | ||
@@ -41,16 +42,16 @@ GMM.prototype.sample = function (nSamples) { | ||
// return: array of length nSamples | ||
var samples = [] | ||
var gaussians = this._gaussians() | ||
var samples = []; | ||
var gaussians = this._gaussians(); | ||
// generate samples | ||
for (var i = 0; i < nSamples; i++) { | ||
var r = Math.random() | ||
var n = 0 | ||
var r = Math.random(); | ||
var n = 0; | ||
while (r > this.weights[n] && n < this.nComponents) { | ||
r -= this.weights[n] | ||
n++ | ||
r -= this.weights[n]; | ||
n++; | ||
} | ||
samples.push(gaussians[n].ppf(Math.random())) | ||
samples.push(gaussians[n].ppf(Math.random())); | ||
} | ||
return samples | ||
} | ||
return samples; | ||
}; | ||
@@ -60,8 +61,8 @@ GMM.prototype.memberships = function (data) { | ||
// return: (data.length * this.nComponents) matrix with membership weights | ||
var memberships = [] | ||
var memberships = []; | ||
for (var i = 0, n = data.length; i < n; i++) { | ||
memberships.push(this.membership(data[i])) | ||
memberships.push(this.membership(data[i])); | ||
} | ||
return memberships | ||
} | ||
return memberships; | ||
}; | ||
@@ -71,10 +72,10 @@ GMM.prototype.membership = function (x) { | ||
// return: array of probabilities of length this.nComponents | ||
var membership = [] | ||
var gaussians = this._gaussians() | ||
var membership = []; | ||
var gaussians = this._gaussians(); | ||
for (var i = 0; i < this.nComponents; i++) { | ||
membership.push(gaussians[i].pdf(x)) | ||
membership.push(gaussians[i].pdf(x)); | ||
} | ||
var sum = membership.reduce(function (a, b) { return a + b }) | ||
return membership.map(function (a) { return a / sum }) | ||
} | ||
var sum = membership.reduce(function (a, b) { return a + b; }); | ||
return membership.map(function (a) { return a / sum; }); | ||
}; | ||
@@ -84,21 +85,24 @@ GMM.prototype.updateModel = function (data) { | ||
// It will update the GMM weights, means and variances. | ||
// Optionally, if options.variancePrior and options.priorRelevance are defined, | ||
// mix in the prior. | ||
// First, we compute the data memberships. | ||
var n = data.length | ||
var memberships = this.memberships(data) | ||
var n = data.length; | ||
var memberships = this.memberships(data); | ||
// Update the mixture weights | ||
var componentWeights = [] | ||
var componentWeights = []; | ||
var reduceFunction = function (k) { return function (a, b) { return (a + b[k]); }; }; | ||
for (var k = 0; k < this.nComponents; k++) { | ||
componentWeights[k] = memberships.reduce(function (a, b) { return (a + b[k]) }, 0) | ||
componentWeights[k] = memberships.reduce(reduceFunction(k), 0); | ||
} | ||
this.weights = componentWeights.map(function (a) { return a / n }) | ||
this.weights = componentWeights.map(function (a) { return a / n; }); | ||
// Update the mixture means | ||
for (k = 0; k < this.nComponents; k++) { | ||
this.means[k] = 0 | ||
this.means[k] = 0; | ||
for (var i = 0; i < n; i++) { | ||
this.means[k] += memberships[i][k] * data[i] | ||
this.means[k] += memberships[i][k] * data[i]; | ||
} | ||
this.means[k] /= componentWeights[k] | ||
this.means[k] /= componentWeights[k]; | ||
} | ||
@@ -108,39 +112,44 @@ | ||
for (k = 0; k < this.nComponents; k++) { | ||
this.vars[k] = 0 | ||
this.vars[k] = 0; | ||
for (i = 0; i < n; i++) { | ||
this.vars[k] += memberships[i][k] * Math.pow(data[i] - this.means[k], 2) | ||
this.vars[k] += memberships[i][k] * Math.pow(data[i] - this.means[k], 2); | ||
} | ||
this.vars[k] /= componentWeights[k] | ||
this.vars[k] /= componentWeights[k]; | ||
// If there is a variance prior: | ||
if (this.options.variancePrior && this.options.priorRelevance) { | ||
var alpha = this.weights[k] / (this.weights[k] + this.options.priorRelevance); | ||
this.vars[k] = alpha * this.vars[k] + (1 - alpha) * this.options.variancePrior; | ||
} | ||
} | ||
} | ||
}; | ||
GMM.prototype.logLikelihood = function (data) { | ||
// data: array of floats representing the samples to compute the log-likelihood with. | ||
var l = 0 | ||
var p = 0 | ||
var gaussians = this._gaussians() | ||
var l = 0; | ||
var p = 0; | ||
var gaussians = this._gaussians(); | ||
for (var i = 0, n = data.length; i < n; i++) { | ||
p = 0 | ||
p = 0; | ||
for (var k = 0; k < this.nComponents; k++) { | ||
p += this.weights[k] * gaussians[k].pdf(data[i]) | ||
p += this.weights[k] * gaussians[k].pdf(data[i]); | ||
} | ||
l += Math.log(p) | ||
l += Math.log(p); | ||
} | ||
return l | ||
} | ||
return l; | ||
}; | ||
GMM.prototype.optimize = function (data, maxIterations, logLikelihoodTol) { | ||
// data: array of floats | ||
maxIterations = maxIterations === undefined ? MAX_ITERATIONS : maxIterations | ||
logLikelihoodTol = logLikelihoodTol === undefined ? LOG_LIKELIHOOD_TOL : logLikelihoodTol | ||
var logLikelihoodDiff = Infinity | ||
var logLikelihood = -Infinity | ||
var temp | ||
maxIterations = maxIterations === undefined ? MAX_ITERATIONS : maxIterations; | ||
logLikelihoodTol = logLikelihoodTol === undefined ? LOG_LIKELIHOOD_TOL : logLikelihoodTol; | ||
var logLikelihoodDiff = Infinity; | ||
var logLikelihood = -Infinity; | ||
var temp; | ||
for (var i = 0; i < maxIterations && logLikelihoodDiff > logLikelihoodTol; i++) { | ||
this.updateModel(data) | ||
temp = this.logLikelihood(data) | ||
logLikelihoodDiff = Math.abs(logLikelihood - temp) | ||
logLikelihood = temp | ||
this.updateModel(data); | ||
temp = this.logLikelihood(data); | ||
logLikelihoodDiff = Math.abs(logLikelihood - temp); | ||
logLikelihood = temp; | ||
} | ||
return i // number of steps to reach the converged solution. | ||
} | ||
return i; // number of steps to reach the converged solution. | ||
}; |
{ | ||
"name": "gaussianMixture", | ||
"version": "0.1.0", | ||
"version": "0.2.0", | ||
"description": "An implementation of a Gaussian Mixture class in one dimension, that allows to fit models with an Expectation Maximization algorithm.", | ||
@@ -8,3 +8,5 @@ "main": "index.js", | ||
"scripts": { | ||
"test": "standard && tap test/*.test.js", | ||
"test": "npm run lint && tap test/*.test.js", | ||
"lint": "eslint '*.js' 'test/*.test.js'", | ||
"fix": "eslint --fix '*.js' 'test/*.test.js'", | ||
"cover": "tap test/*.test.js --cov --coverage-report=lcov" | ||
@@ -21,3 +23,4 @@ }, | ||
"devDependencies": { | ||
"standard": "~7.1.2", | ||
"eslint": "^3.4.0", | ||
"eslint-config-mourner": "^2.0.1", | ||
"tap": "~6.2.0" | ||
@@ -24,0 +27,0 @@ }, |
@@ -21,2 +21,20 @@ # Gaussian Mixture | ||
gmm.optimize(data) // updates weights, means and variances with the EM algorithm given the data. | ||
console.log(gmm.means); // >> [1.225, 7.3, 14.8] | ||
``` | ||
## Options | ||
Optionally, define an options object and pass it at instantiation: | ||
``` | ||
var options = { | ||
variancePrior: // Float, | ||
priorRelevance: // Positive float. | ||
}; | ||
var gmm = new GMM(nComponents, weights, means, vars, options); | ||
``` | ||
The variance prior allows you to define a prior for the variance of all components (assumed the same). This prior will be mixed in the maximization step of the EM optimization with a relevance score equal to `options.priorRelevance`. | ||
The mixing weight `alpha` for component `i` is `alpha = weights[i] / (weights[i] + options.priorRelevance)`. |
@@ -1,103 +0,132 @@ | ||
var test = require('tap').test | ||
var GMM = require('../index') | ||
'use strict'; | ||
var test = require('tap').test; | ||
var data = require('./fixtures/data.json'); // 200 samples from refGmm | ||
var GMM = require('../index'); | ||
test('Initialization of a new GMM object.', function (t) { | ||
t.plan(2) | ||
t.plan(2); | ||
function f (k) { | ||
function f(k) { | ||
return function () { | ||
var gmm = new GMM(3, k) | ||
gmm.sample(0) | ||
} | ||
var gmm = new GMM(3, k); | ||
gmm.sample(0); | ||
}; | ||
} | ||
t.throws(f([1, 2])) | ||
t.doesNotThrow(f([1, 2, 3])) | ||
}) | ||
t.throws(f([1, 2])); | ||
t.doesNotThrow(f([1, 2, 3])); | ||
}); | ||
test('Random sampling.', function (t) { | ||
t.plan(1) | ||
t.plan(1); | ||
var gmm = new GMM(3) | ||
t.equals(5, gmm.sample(5).length) | ||
}) | ||
var gmm = new GMM(3); | ||
t.equals(5, gmm.sample(5).length); | ||
}); | ||
test('Gaussians of a mixture model.', function (t) { | ||
t.plan(6) | ||
t.plan(6); | ||
var gmm = new GMM(3, [1 / 3, 1 / 3, 1 / 3], [0, 10, 20], [1, 2, 0.5]) | ||
var gaussians = gmm._gaussians() | ||
t.equals(gaussians[0].mean, 0) | ||
t.equals(gaussians[1].mean, 10) | ||
t.equals(gaussians[2].mean, 20) | ||
t.equals(gaussians[0].variance, 1) | ||
t.equals(gaussians[1].variance, 2) | ||
t.equals(gaussians[2].variance, 0.5) | ||
}) | ||
var gmm = new GMM(3, [1 / 3, 1 / 3, 1 / 3], [0, 10, 20], [1, 2, 0.5]); | ||
var gaussians = gmm._gaussians(); | ||
t.equals(gaussians[0].mean, 0); | ||
t.equals(gaussians[1].mean, 10); | ||
t.equals(gaussians[2].mean, 20); | ||
t.equals(gaussians[0].variance, 1); | ||
t.equals(gaussians[1].variance, 2); | ||
t.equals(gaussians[2].variance, 0.5); | ||
}); | ||
test('Computing membership for one datapoint', function (t) { | ||
t.plan(2) | ||
t.plan(2); | ||
var gmm = new GMM(3, undefined, [0, 10, 20]) | ||
t.equals(gmm.membership(5)[0], gmm.membership(5)[1]) | ||
t.equals(gmm.membership(0)[0] > 0.99, true) | ||
}) | ||
var gmm = new GMM(3, undefined, [0, 10, 20]); | ||
t.equals(gmm.membership(5)[0], gmm.membership(5)[1]); | ||
t.equals(gmm.membership(0)[0] > 0.99, true); | ||
}); | ||
test('Shape of the membership matrix', function (t) { | ||
t.plan(2) | ||
t.plan(2); | ||
var gmm = new GMM(5) | ||
var memberships = gmm.memberships([1, 2, 3, 4, 5, 6]) | ||
t.equals(memberships.length, 6) | ||
t.equals(memberships[0].length, 5) | ||
}) | ||
var gmm = new GMM(5); | ||
var memberships = gmm.memberships([1, 2, 3, 4, 5, 6]); | ||
t.equals(memberships.length, 6); | ||
t.equals(memberships[0].length, 5); | ||
}); | ||
test('Convergence of model update', function (t) { | ||
t.plan(9) | ||
t.plan(9); | ||
var refGmm = new GMM(3, [0.2, 0.5, 0.3], [0, 10, 30], [1, 2, 4]) | ||
var testGmm = new GMM(3, undefined, [-1, 13, 25], [1, 1, 1]) | ||
var data = require('./fixtures/data.json') // 200 samples from refGmm | ||
var refGmm = new GMM(3, [0.2, 0.5, 0.3], [0, 10, 30], [1, 2, 4]); | ||
var testGmm = new GMM(3, undefined, [-1, 13, 25], [1, 1, 1]); | ||
for (var i = 0; i < 200; i++) { | ||
testGmm.updateModel(data) | ||
testGmm.updateModel(data); | ||
} | ||
for (var j = 0; j < 3; j++) { | ||
t.equals(Math.abs(testGmm.weights[j] - refGmm.weights[j]) < 0.1, true) | ||
t.equals(Math.abs(testGmm.means[j] - refGmm.means[j]) < 1, true) | ||
t.equals(Math.abs(testGmm.vars[j] - refGmm.vars[j]) < 1, true) | ||
t.equals(Math.abs(testGmm.weights[j] - refGmm.weights[j]) < 0.1, true); | ||
t.equals(Math.abs(testGmm.means[j] - refGmm.means[j]) < 1, true); | ||
t.equals(Math.abs(testGmm.vars[j] - refGmm.vars[j]) < 1, true); | ||
} | ||
}) | ||
}); | ||
test('log likelihood function', function (t) { | ||
t.plan(15) | ||
t.plan(15); | ||
var gmm = new GMM(3, undefined, [-1, 15, 37], [1, 1, 1]) | ||
var data = require('./fixtures/data.json') // 200 samples from another GMM | ||
var l = -Infinity | ||
var temp | ||
var gmm = new GMM(3, undefined, [-1, 15, 37], [1, 1, 1]); | ||
var l = -Infinity; | ||
var temp; | ||
for (var i = 0; i < 15; i++) { | ||
gmm.updateModel(data) | ||
temp = gmm.logLikelihood(data) | ||
t.equals(temp - l >= -1e-5, true) | ||
l = temp | ||
gmm.updateModel(data); | ||
temp = gmm.logLikelihood(data); | ||
t.equals(temp - l >= -1e-5, true); | ||
l = temp; | ||
} | ||
}) | ||
}); | ||
test('EM full optimization', function (t) { | ||
t.plan(10) | ||
t.plan(10); | ||
var gmm = new GMM(3, undefined, [-1, 13, 25], [1, 1, 1]) | ||
var gmm2 = new GMM(3, undefined, [-1, 13, 25], [1, 1, 1]) | ||
var gmm = new GMM(3, undefined, [-1, 13, 25], [1, 1, 1]); | ||
var gmm2 = new GMM(3, undefined, [-1, 13, 25], [1, 1, 1]); | ||
var data = require('./fixtures/data.json') // 200 samples from refGmm | ||
for (var i = 0; i < 200; i++) { | ||
gmm.updateModel(data) | ||
gmm.updateModel(data); | ||
} | ||
var counter = gmm2.optimize(data) | ||
var counter = gmm2.optimize(data); | ||
t.equals(counter, 3) | ||
t.equals(counter, 3); | ||
for (var j = 0; j < 3; j++) { | ||
t.equals(Math.abs(gmm.weights[j] - gmm2.weights[j]) < 1e-7, true) | ||
t.equals(Math.abs(gmm.means[j] - gmm2.means[j]) < 1e-7, true) | ||
t.equals(Math.abs(gmm.vars[j] - gmm2.vars[j]) < 1e-7, true) | ||
t.equals(Math.abs(gmm.weights[j] - gmm2.weights[j]) < 1e-7, true); | ||
t.equals(Math.abs(gmm.means[j] - gmm2.means[j]) < 1e-7, true); | ||
t.equals(Math.abs(gmm.vars[j] - gmm2.vars[j]) < 1e-7, true); | ||
} | ||
}) | ||
}); | ||
test('Variance prior', function (t) { | ||
t.plan(3); | ||
var options = { | ||
variancePrior: 3, | ||
priorRelevance: 0.01 | ||
}; | ||
var options2 = { | ||
variancePrior: 3, | ||
priorRelevance: 1 | ||
}; | ||
var options3 = { | ||
variancePrior: 3, | ||
priorRelevance: 1000000 | ||
}; | ||
var gmm = new GMM(3, undefined, [-1, 13, 25], [1, 1, 1], options); | ||
var gmm2 = new GMM(3, undefined, [-1, 13, 25], [1, 1, 1], options2); | ||
var gmm3 = new GMM(3, undefined, [-1, 13, 25], [1, 1, 1], options3); | ||
gmm.optimize(data); | ||
gmm2.optimize(data); | ||
gmm3.optimize(data); | ||
var cropFloat = function (a) { return Number(a.toFixed(1)); }; | ||
t.same(gmm.vars.map(cropFloat), [1.1, 2.5, 4.6]); | ||
t.same(gmm2.vars.map(cropFloat), [2.7, 2.8, 3.4]); | ||
t.same(gmm3.vars.map(cropFloat), [3, 3, 3]); | ||
}); |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
15713
9
243
40
3
1