Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

ml-kmeans

Package Overview
Dependencies
Maintainers
7
Versions
14
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

ml-kmeans - npm Package Compare versions

Comparing version 2.0.0 to 3.0.0

src/KMeansResult.js

20

History.md

@@ -0,1 +1,21 @@

<a name="3.0.0"></a>
# [3.0.0](https://github.com/mljs/kmeans/compare/v2.0.0...v3.0.0) (2016-10-07)
### Bug Fixes
* remove unneeded array creations in updateClusterID ([b2957d5](https://github.com/mljs/kmeans/commit/b2957d5))
### Features
* accepts distance function as parameter ([68993a1](https://github.com/mljs/kmeans/commit/68993a1))
* allows to compute a new array of points their cluster id ([254f9a9](https://github.com/mljs/kmeans/commit/254f9a9))
* return a generator in the withIterations option ([3b4c7c6](https://github.com/mljs/kmeans/commit/3b4c7c6))
* return the number of iteration done ([75fd42e](https://github.com/mljs/kmeans/commit/75fd42e))
* returns the error and the size of each cluster ([e85aab7](https://github.com/mljs/kmeans/commit/e85aab7))
* when maxIterations is 0, iterates until converge ([220c828](https://github.com/mljs/kmeans/commit/220c828))
<a name="2.0.0"></a>

@@ -2,0 +22,0 @@ # [2.0.0](https://github.com/mljs/kmeans/compare/v1.0.0...v2.0.0) (2016-09-27)

13

package.json
{
"name": "ml-kmeans",
"version": "2.0.0",
"version": "3.0.0",
"description": "K-means in Javascript",
"main": "src/kmeans.js",
"directories": {
"lib": "src",
"test": "test"
},
"files": [
"src",
"tonic.js"
],
"scripts": {

@@ -49,4 +49,5 @@ "eslint": "eslint src test",

"RandomSelection": "^1.0.1",
"ml-distance-euclidean": "^1.0.0"
"ml-distance-euclidean": "^1.0.0",
"ml-nearest-vector": "^1.0.0"
}
}

@@ -1,2 +0,2 @@

# clustering
# kmeans

@@ -9,4 +9,6 @@ [![NPM version][npm-image]][npm-url]

[K-means](https://en.wikipedia.org/wiki/K-means_clustering) in JavaScript
> [K-means clustering](https://en.wikipedia.org/wiki/K-means_clustering) in JavaScript
K-means clustering aims to partition n observations into k clusters in which each observation belongs to the cluster with the nearest mean.
## Installation

@@ -18,2 +20,24 @@

## Example
```js
const kmeans = require('ml-kmeans');
let data = [[1, 1, 1], [1, 2, 1], [-1, -1, -1], [-1, -1, -1.5]];
let centers = [[1, 2, 1], [-1, -1, -1]];
let ans = kmeans(data, 2, {initialization: centers});
console.log(ans);
/*
KMeansResult {
clusters: [ 0, 0, 1, 1 ],
centroids:
[ { centroid: [ 1, 1.5, 1 ], error: 0.25, size: 2 },
{ centroid: [ -1, -1, -1.25 ], error: 0.0625, size: 2 } ],
converged: true,
iterations: 1
}
*/
```
## Test

@@ -20,0 +44,0 @@

'use strict';
const distance = require('ml-distance-euclidean');
const Picker = require('RandomSelection').Picker;

@@ -28,5 +27,6 @@

* @param {Number} K - Number of clusters
* @param {Array<Array<Number>>} distanceMatrix - matrix with the distance values
* @return {Array<Array<Number>>} - Initial random points
*/
function mostDistant(data, K) {
function mostDistant(data, K, distanceMatrix) {
var ans = new Array(K);

@@ -37,18 +37,2 @@

// calculate distance matrix
var distanceMatrix = new Array(data.length);
for (var i = 0; i < data.length; ++i) {
for (var j = i; j < data.length; ++j) {
if (!distanceMatrix[i]) {
distanceMatrix[i] = new Array(data.length);
}
if (!distanceMatrix[j]) {
distanceMatrix[j] = new Array(data.length);
}
const dist = distance(data[i], data[j]);
distanceMatrix[i][j] = dist;
distanceMatrix[j][i] = dist;
}
}
if (K > 1) {

@@ -55,0 +39,0 @@ // chooses the more distant point

@@ -5,2 +5,4 @@ 'use strict';

const init = require('./initialization');
const KMeansResult = require('./KMeansResult');
const squaredDistance = require('ml-distance-euclidean').squared;

@@ -11,6 +13,46 @@ const defaultOptions = {

withIterations: false,
initialization: 'mostDistant'
initialization: 'mostDistant',
distanceFunction: squaredDistance
};
/**
* Each step operation for kmeans
* @ignore
* @param {Array<Array<Number>>} centers - the K centers in format [x,y,z,...]
* @param {Array<Array<Number>>} data - the [x,y,z,...] points to cluster
* @param {Array<Number>} clusterID - the cluster identifier for each data dot
* @param {Number} K - Number of clusters
* @param {Object} [options] - Option object
* @param {Number} iterations - Current number of iterations
* @return {KMeansResult}
*/
function step(centers, data, clusterID, K, options, iterations) {
clusterID = utils.updateClusterID(data, centers, clusterID, options.distanceFunction);
var newCenters = utils.updateCenters(data, clusterID, K);
var converged = utils.converged(newCenters, centers, options.distanceFunction, options.tolerance);
return new KMeansResult(clusterID, newCenters, converged, iterations, options.distanceFunction);
}
/**
* Generator version for the algorithm
* @ignore
* @param {Array<Array<Number>>} centers - the K centers in format [x,y,z,...]
* @param {Array<Array<Number>>} data - the [x,y,z,...] points to cluster
* @param {Array<Number>} clusterID - the cluster identifier for each data dot
* @param {Number} K - Number of clusters
* @param {Object} [options] - Option object
*/
function* kmeansGenerator(centers, data, clusterID, K, options) {
var converged = false;
var stepNumber = 0;
var stepResult;
while (!converged && (stepNumber < options.maxIterations)) {
stepResult = step(centers, data, clusterID, K, options, ++stepNumber);
yield stepResult.computeInformation(data);
converged = stepResult.converged;
centers = stepResult.centroids;
}
}
/**
* K-means algorithm

@@ -23,9 +65,10 @@ * @param {Array<Array<Number>>} data - Points in the format to cluster [x,y,z,...]

* @param {Boolean} [options.withIterations = false] - Store clusters and centroids for each iteration
* @param {Function} [options.distanceFunction = squaredDistance] - Distance function to use between the points
* @param {String|Array<Array<Number>>} [options.initialization = 'moreDistant'] - K centers in format [x,y,z,...] or a method for initialize the data:
* * `'random'` will choose K random different values.
* * `'mostDistant'` will choose the more distant points to a first random pick
* @returns {Object} - Cluster identifier for each data dot and centroids with the following fields:
* @returns {KMeansResult} - Cluster identifier for each data dot and centroids with the following fields:
* * `'clusters'`: Array of indexes for the clusters.
* * `'centroids'`: Array with the resulting centroids.
* * `'iterations'`: Array with the state of 'clusters' and 'centroids' for each iteration.
* * `'iterations'`: Number of iterations that took to converge
*/

@@ -35,4 +78,4 @@ function kmeans(data, K, options) {

if (K > data.length) {
throw new Error('Data length should be bigger than K');
if (K <= 0 || K > data.length || !Number.isInteger(K)) {
throw new Error('K should be a positive integer bigger than the number of points');
}

@@ -53,3 +96,3 @@

case 'mostDistant':
centers = init.mostDistant(data, K);
centers = init.mostDistant(data, K, utils.calculateDistanceMatrix(data, options.distanceFunction));
break;

@@ -61,49 +104,20 @@ default:

var clusterID = new Array(data.length);
for (var i = 0; i < data.length; ++i) {
clusterID[i] = 0;
// infinite loop until convergence
if (options.maxIterations === 0) {
options.maxIterations = Number.MAX_VALUE;
}
var lastDistance = 1e100;
var curDistance = 0;
var iterations = [];
for (var iter = 0; iter < options.maxIterations; ++iter) {
clusterID = utils.updateClusterID(data, centers);
centers = utils.updateCenters(data, clusterID, K);
curDistance = utils.computeSSE(data, centers, clusterID);
if (options.withIterations) {
iterations.push({
'clusters': clusterID,
'centroids': centers
});
}
if ((lastDistance - curDistance < options.tolerance) || ((lastDistance - curDistance) / lastDistance < options.tolerance)) {
if (options.withIterations) {
return {
'clusters': clusterID,
'centroids': centers,
'iterations': iterations
};
} else {
return {
'clusters': clusterID,
'centroids': centers
};
}
}
lastDistance = curDistance;
}
// exceed number of iterations
var clusterID = new Array(data.length);
if (options.withIterations) {
return {
'clusters': clusterID,
'centroids': centers,
'iterations': iterations
};
return kmeansGenerator(centers, data, clusterID, K, options);
} else {
return {
'clusters': clusterID,
'centroids': centers
};
var converged = false;
var stepNumber = 0;
var stepResult;
while (!converged && (stepNumber < options.maxIterations)) {
stepResult = step(centers, data, clusterID, K, options, ++stepNumber);
converged = stepResult.converged;
centers = stepResult.centroids;
}
return stepResult.computeInformation(data);
}

@@ -110,0 +124,0 @@ }

'use strict';
const squaredDistance = require('ml-distance-euclidean').squared;
const nearestVector = require('ml-nearest-vector');
/**
* Calculates the sum of squared errors
* Calculates the distance matrix for a given array of points
* @ignore
* @param {Array<Array<Number>>} data - the [x,y,z,...] points to cluster
* @param {Array<Array<Number>>} centers - the K centers in format [x,y,z,...]
* @param {Array<Number>} clusterID - the cluster identifier for each data dot
* @returns {Number} the sum of squared errors
* @param {Function} distance - Distance function to use between the points
* @return {Array<Array<Number>>} - matrix with the distance values
*/
function computeSSE(data, centers, clusterID) {
var sse = 0;
var c = 0;
for (var i = 0; i < data.length; i++) {
c = clusterID[i];
sse += squaredDistance(data[i], centers[c]);
function calculateDistanceMatrix(data, distance) {
var distanceMatrix = new Array(data.length);
for (var i = 0; i < data.length; ++i) {
for (var j = i; j < data.length; ++j) {
if (!distanceMatrix[i]) {
distanceMatrix[i] = new Array(data.length);
}
if (!distanceMatrix[j]) {
distanceMatrix[j] = new Array(data.length);
}
const dist = distance(data[i], data[j]);
distanceMatrix[i][j] = dist;
distanceMatrix[j][i] = dist;
}
}
return sse;
return distanceMatrix;
}

@@ -28,26 +35,9 @@

* @param {Array<Array<Number>>} centers - the K centers in format [x,y,z,...]
* @param {Array <Number>} clusterID - the cluster identifier for each data dot
* @param {Function} distance - Distance function to use between the points
* @returns {Array} the cluster identifier for each data dot
*/
function updateClusterID(data, centers) {
const k = centers.length;
var aux = 0;
var clusterID = new Array(data.length);
for (var index = 0; index < data.length; index++)
clusterID[index] = 0;
var d = new Array(data.length);
function updateClusterID(data, centers, clusterID, distance) {
for (var i = 0; i < data.length; i++) {
d[i] = new Array(k);
for (var j = 0; j < k; j++) {
aux = squaredDistance(data[i], centers[j]);
d[i][j] = [aux, j];
}
var min = d[i][0][0];
var id = 0;
for (var l = 0; l < k; l++) {
if (d[i][l][0] < min) {
min = d[i][l][0];
id = d[i][l][1];
}
}
clusterID[i] = id;
clusterID[i] = nearestVector(centers, data[i], {distanceFunction: distance});
}

@@ -67,5 +57,9 @@ return clusterID;

const nDim = data[0].length;
// creates empty centers with 0 size
var centers = new Array(K);
var centersLen = new Array(K);
for (var i = 0; i < K; i++) {
centers[i] = new Array(nDim);
centersLen[i] = 0;
for (var j = 0; j < nDim; j++) {

@@ -76,21 +70,14 @@ centers[i][j] = 0;

for (var k = 0; k < K; k++) {
var cluster = [];
for (var l = 0; l < data.length; l++) {
if (clusterID[l] === k) {
cluster.push(data[l]);
}
// add the value for all dimensions of the point
for (var l = 0; l < data.length; l++) {
centersLen[clusterID[l]]++;
for (var dim = 0; dim < nDim; dim++) {
centers[clusterID[l]][dim] += data[l][dim];
}
}
// divides by length
for (var id = 0; id < K; id++) {
for (var d = 0; d < nDim; d++) {
var x = [];
for (var m = 0; m < data.length; m++) {
if (clusterID[m] === k) {
x.push(data[m][d]);
}
}
var sum = 0;
for (var n = 0; n < x.length; n++) {
sum += x[n];
}
centers[k][d] = sum / x.length;
centers[id][d] /= centersLen[id];
}

@@ -101,4 +88,23 @@ }

exports.computeSSE = computeSSE;
/**
* The centers have moved more than the tolerance value?
* @ignore
* @param {Array<Array<Number>>} centers - the K centers in format [x,y,z,...]
* @param {Array<Array<Number>>} oldCenters - the K old centers in format [x,y,z,...]
* @param {Function} distanceFunction - Distance function to use between the points
* @param {Number} tolerance - Allowed distance for the centroids to move
* @return {boolean}
*/
function converged(centers, oldCenters, distanceFunction, tolerance) {
for (var i = 0; i < centers.length; i++) {
if (distanceFunction(centers[i], oldCenters[i]) > tolerance) {
return false;
}
}
return true;
}
exports.updateClusterID = updateClusterID;
exports.updateCenters = updateCenters;
exports.calculateDistanceMatrix = calculateDistanceMatrix;
exports.converged = converged;
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc