Comparing version 3.0.0 to 3.0.1
@@ -0,1 +1,6 @@ | ||
<a name="3.0.1"></a> | ||
## [3.0.1](https://github.com/mljs/kmeans/compare/v3.0.0...v3.0.1) (2018-05-04) | ||
<a name="3.0.0"></a> | ||
@@ -2,0 +7,0 @@ # [3.0.0](https://github.com/mljs/kmeans/compare/v2.0.0...v3.0.0) (2016-10-07) |
{ | ||
"name": "ml-kmeans", | ||
"version": "3.0.0", | ||
"description": "K-means in Javascript", | ||
"main": "src/kmeans.js", | ||
"version": "3.0.1", | ||
"description": "K-Means clustering", | ||
"main": "kmeans.js", | ||
"module": "src/kmeans.js", | ||
"files": [ | ||
"src", | ||
"tonic.js" | ||
"kmeans.js", | ||
"runkit.js" | ||
], | ||
"scripts": { | ||
"eslint": "eslint src test", | ||
"compile": "rollup -c", | ||
"eslint": "eslint src", | ||
"eslint-fix": "npm run eslint -- --fix", | ||
"test": "npm run test-mocha && npm run eslint", | ||
"test-cov": "istanbul cover node_modules/.bin/_mocha -- --require should --reporter dot --recursive", | ||
"test-travis": "istanbul cover node_modules/.bin/_mocha --report lcovonly -- --require should --reporter dot --recursive", | ||
"test-mocha": "mocha --require should --reporter mocha-better-spec-reporter --recursive" | ||
"prepublishOnly": "npm run compile", | ||
"test": "npm run test-coverage && npm run eslint", | ||
"test-only": "jest", | ||
"test-coverage": "jest --coverage" | ||
}, | ||
@@ -37,11 +40,10 @@ "repository": { | ||
"homepage": "https://github.com/mljs/kmeans", | ||
"tonicExampleFilename": "./tonic.js", | ||
"devDependencies": { | ||
"eslint": "^3.4.0", | ||
"eslint-config-cheminfo": "^1.2.0", | ||
"eslint-plugin-no-only-tests": "^1.1.0", | ||
"istanbul": "^0.4.4", | ||
"mocha": "^3.0.2", | ||
"mocha-better-spec-reporter": "^3.0.2", | ||
"should": "^11.1.0" | ||
"babel-plugin-transform-es2015-modules-commonjs": "^6.26.0", | ||
"eslint": "^4.19.1", | ||
"eslint-config-cheminfo": "^1.17.3", | ||
"eslint-plugin-import": "^2.11.0", | ||
"eslint-plugin-jest": "^21.15.1", | ||
"jest": "^22.4.3", | ||
"rollup": "^0.58.2" | ||
}, | ||
@@ -48,0 +50,0 @@ "dependencies": { |
@@ -1,16 +0,14 @@ | ||
# kmeans | ||
# ml-kmeans | ||
[![NPM version][npm-image]][npm-url] | ||
[![build status][travis-image]][travis-url] | ||
[![Test coverage][coveralls-image]][coveralls-url] | ||
[![David deps][david-image]][david-url] | ||
[![npm download][download-image]][download-url] | ||
[![NPM version][npm-image]][npm-url] | ||
[![build status][travis-image]][travis-url] | ||
[![Test coverage][codecov-image]][codecov-url] | ||
[![npm download][download-image]][download-url] | ||
> [K-means clustering](https://en.wikipedia.org/wiki/K-means_clustering) in JavaScript | ||
[K-means clustering][] aims to partition n observations into k clusters in which | ||
each observation belongs to the cluster with the nearest mean. | ||
K-means clustering aims to partition n observations into k clusters in which each observation belongs to the cluster with the nearest mean. | ||
## Installation | ||
`npm install ml-kmeans` | ||
`npm install --save ml-kmeans` | ||
@@ -27,3 +25,3 @@ ## [API Documentation](https://mljs.github.io/kmeans/) | ||
let ans = kmeans(data, 2, {initialization: centers}); | ||
let ans = kmeans(data, 2, { initialization: centers }); | ||
console.log(ans); | ||
@@ -42,16 +40,9 @@ /* | ||
## Test | ||
```bash | ||
npm install | ||
npm test | ||
``` | ||
## Authors | ||
- [Miguel Asencio](https://github.com/maasencioh) | ||
* [Miguel Asencio](https://github.com/maasencioh) | ||
## License | ||
[MIT](./LICENSE) | ||
[MIT](./LICENSE) | ||
@@ -62,7 +53,6 @@ [npm-image]: https://img.shields.io/npm/v/ml-kmeans.svg?style=flat-square | ||
[travis-url]: https://travis-ci.org/mljs/kmeans | ||
[coveralls-image]: https://img.shields.io/coveralls/mljs/kmeans.svg?style=flat-square | ||
[coveralls-url]: https://coveralls.io/github/mljs/kmeans | ||
[david-image]: https://img.shields.io/david/mljs/kmeans.svg?style=flat-square | ||
[david-url]: https://david-dm.org/mljs/kmeans | ||
[codecov-image]: https://img.shields.io/codecov/c/github/mljs/kmeans.svg?style=flat-square | ||
[codecov-url]: https://codecov.io/github/mljs/kmeans | ||
[download-image]: https://img.shields.io/npm/dm/ml-kmeans.svg?style=flat-square | ||
[download-url]: https://npmjs.org/package/ml-kmeans | ||
[k-means clustering]: https://en.wikipedia.org/wiki/K-means_clustering |
@@ -1,4 +0,4 @@ | ||
'use strict'; | ||
import RandomSelection from 'RandomSelection'; | ||
const Picker = require('RandomSelection').Picker; | ||
const Picker = RandomSelection.Picker; | ||
@@ -8,14 +8,14 @@ /** | ||
* @ignore | ||
* @param {Array<Array<Number>>} data - Points in the format to cluster [x,y,z,...] | ||
* @param {Number} K - Number of clusters | ||
* @return {Array<Array<Number>>} - Initial random points | ||
* @param {Array<Array<number>>} data - Points in the format to cluster [x,y,z,...] | ||
* @param {number} K - number of clusters | ||
* @return {Array<Array<number>>} - Initial random points | ||
*/ | ||
function random(data, K) { | ||
const rand = new Picker(data); | ||
var ans = new Array(K); | ||
export function random(data, K) { | ||
const rand = new Picker(data); | ||
var ans = new Array(K); | ||
for (var i = 0; i < K; ++i) { | ||
ans[i] = rand.pick(); | ||
} | ||
return ans; | ||
for (var i = 0; i < K; ++i) { | ||
ans[i] = rand.pick(); | ||
} | ||
return ans; | ||
} | ||
@@ -26,55 +26,57 @@ | ||
* @ignore | ||
* @param {Array<Array<Number>>} data - Points in the format to cluster [x,y,z,...] | ||
* @param {Number} K - Number of clusters | ||
* @param {Array<Array<Number>>} distanceMatrix - matrix with the distance values | ||
* @return {Array<Array<Number>>} - Initial random points | ||
* @param {Array<Array<number>>} data - Points in the format to cluster [x,y,z,...] | ||
* @param {number} K - number of clusters | ||
* @param {Array<Array<number>>} distanceMatrix - matrix with the distance values | ||
* @return {Array<Array<number>>} - Initial random points | ||
*/ | ||
function mostDistant(data, K, distanceMatrix) { | ||
var ans = new Array(K); | ||
export function mostDistant(data, K, distanceMatrix) { | ||
var ans = new Array(K); | ||
// chooses a random point as initial cluster | ||
ans[0] = Math.floor(Math.random() * data.length); | ||
// chooses a random point as initial cluster | ||
ans[0] = Math.floor(Math.random() * data.length); | ||
if (K > 1) { | ||
// chooses the more distant point | ||
var maxDist = {dist: -1, index: -1}; | ||
for (var l = 0; l < data.length; ++l) { | ||
if (distanceMatrix[ans[0]][l] > maxDist.dist) { | ||
maxDist.dist = distanceMatrix[ans[0]][l]; | ||
maxDist.index = l; | ||
if (K > 1) { | ||
// chooses the more distant point | ||
var maxDist = { dist: -1, index: -1 }; | ||
for (var l = 0; l < data.length; ++l) { | ||
if (distanceMatrix[ans[0]][l] > maxDist.dist) { | ||
maxDist.dist = distanceMatrix[ans[0]][l]; | ||
maxDist.index = l; | ||
} | ||
} | ||
ans[1] = maxDist.index; | ||
if (K > 2) { | ||
// chooses the set of points that maximises the min distance | ||
for (var k = 2; k < K; ++k) { | ||
var center = { dist: -1, index: -1 }; | ||
for (var m = 0; m < data.length; ++m) { | ||
// minimum distance to centers | ||
var minDistCent = { dist: Number.MAX_VALUE, index: -1 }; | ||
for (var n = 0; n < k; ++n) { | ||
if ( | ||
distanceMatrix[n][m] < minDistCent.dist && | ||
ans.indexOf(m) === -1 | ||
) { | ||
minDistCent = { | ||
dist: distanceMatrix[n][m], | ||
index: m | ||
}; | ||
} | ||
} | ||
if ( | ||
minDistCent.dist !== Number.MAX_VALUE && | ||
minDistCent.dist > center.dist | ||
) { | ||
center = Object.assign({}, minDistCent); | ||
} | ||
} | ||
ans[1] = maxDist.index; | ||
if (K > 2) { | ||
// chooses the set of points that maximises the min distance | ||
for (var k = 2; k < K; ++k) { | ||
var center = {dist: -1, index: -1}; | ||
for (var m = 0; m < data.length; ++m) { | ||
// minimum distance to centers | ||
var minDistCent = {dist: Number.MAX_VALUE, index: -1}; | ||
for (var n = 0; n < k; ++n) { | ||
if (distanceMatrix[n][m] < minDistCent.dist && ans.indexOf(m) === -1) { | ||
minDistCent = { | ||
dist: distanceMatrix[n][m], | ||
index: m | ||
}; | ||
} | ||
} | ||
if (minDistCent.dist !== Number.MAX_VALUE && minDistCent.dist > center.dist) { | ||
center = Object.assign({}, minDistCent); | ||
} | ||
} | ||
ans[k] = center.index; | ||
} | ||
} | ||
ans[k] = center.index; | ||
} | ||
} | ||
} | ||
return ans.map((index) => data[index]); | ||
return ans.map((index) => data[index]); | ||
} | ||
exports.random = random; | ||
exports.mostDistant = mostDistant; |
@@ -1,14 +0,18 @@ | ||
'use strict'; | ||
import euclidean from 'ml-distance-euclidean'; | ||
const utils = require('./utils'); | ||
const init = require('./initialization'); | ||
const KMeansResult = require('./KMeansResult'); | ||
const squaredDistance = require('ml-distance-euclidean').squared; | ||
import { | ||
updateClusterID, | ||
updateCenters, | ||
hasConverged, | ||
calculateDistanceMatrix | ||
} from './utils'; | ||
import { mostDistant, random } from './initialization'; | ||
import KMeansResult from './KMeansResult'; | ||
const defaultOptions = { | ||
maxIterations: 100, | ||
tolerance: 1e-6, | ||
withIterations: false, | ||
initialization: 'mostDistant', | ||
distanceFunction: squaredDistance | ||
maxIterations: 100, | ||
tolerance: 1e-6, | ||
withIterations: false, | ||
initialization: 'mostDistant', | ||
distanceFunction: euclidean.squared | ||
}; | ||
@@ -19,15 +23,31 @@ | ||
* @ignore | ||
* @param {Array<Array<Number>>} centers - the K centers in format [x,y,z,...] | ||
* @param {Array<Array<Number>>} data - the [x,y,z,...] points to cluster | ||
* @param {Array<Number>} clusterID - the cluster identifier for each data dot | ||
* @param {Number} K - Number of clusters | ||
* @param {Object} [options] - Option object | ||
* @param {Number} iterations - Current number of iterations | ||
* @param {Array<Array<number>>} centers - K centers in format [x,y,z,...] | ||
* @param {Array<Array<number>>} data - Points [x,y,z,...] to cluster | ||
* @param {Array<number>} clusterID - Cluster identifier for each data dot | ||
* @param {number} K - Number of clusters | ||
* @param {object} [options] - Option object | ||
* @param {number} iterations - Current number of iterations | ||
* @return {KMeansResult} | ||
*/ | ||
function step(centers, data, clusterID, K, options, iterations) { | ||
clusterID = utils.updateClusterID(data, centers, clusterID, options.distanceFunction); | ||
var newCenters = utils.updateCenters(data, clusterID, K); | ||
var converged = utils.converged(newCenters, centers, options.distanceFunction, options.tolerance); | ||
return new KMeansResult(clusterID, newCenters, converged, iterations, options.distanceFunction); | ||
clusterID = updateClusterID( | ||
data, | ||
centers, | ||
clusterID, | ||
options.distanceFunction | ||
); | ||
var newCenters = updateCenters(data, clusterID, K); | ||
var converged = hasConverged( | ||
newCenters, | ||
centers, | ||
options.distanceFunction, | ||
options.tolerance | ||
); | ||
return new KMeansResult( | ||
clusterID, | ||
newCenters, | ||
converged, | ||
iterations, | ||
options.distanceFunction | ||
); | ||
} | ||
@@ -38,18 +58,18 @@ | ||
* @ignore | ||
* @param {Array<Array<Number>>} centers - the K centers in format [x,y,z,...] | ||
* @param {Array<Array<Number>>} data - the [x,y,z,...] points to cluster | ||
* @param {Array<Number>} clusterID - the cluster identifier for each data dot | ||
* @param {Number} K - Number of clusters | ||
* @param {Object} [options] - Option object | ||
* @param {Array<Array<number>>} centers - K centers in format [x,y,z,...] | ||
* @param {Array<Array<number>>} data - Points [x,y,z,...] to cluster | ||
* @param {Array<number>} clusterID - Cluster identifier for each data dot | ||
* @param {number} K - Number of clusters | ||
* @param {object} [options] - Option object | ||
*/ | ||
function* kmeansGenerator(centers, data, clusterID, K, options) { | ||
var converged = false; | ||
var stepNumber = 0; | ||
var stepResult; | ||
while (!converged && (stepNumber < options.maxIterations)) { | ||
stepResult = step(centers, data, clusterID, K, options, ++stepNumber); | ||
yield stepResult.computeInformation(data); | ||
converged = stepResult.converged; | ||
centers = stepResult.centroids; | ||
} | ||
var converged = false; | ||
var stepNumber = 0; | ||
var stepResult; | ||
while (!converged && stepNumber < options.maxIterations) { | ||
stepResult = step(centers, data, clusterID, K, options, ++stepNumber); | ||
yield stepResult.computeInformation(data); | ||
converged = stepResult.converged; | ||
centers = stepResult.centroids; | ||
} | ||
} | ||
@@ -59,13 +79,13 @@ | ||
* K-means algorithm | ||
* @param {Array<Array<Number>>} data - Points in the format to cluster [x,y,z,...] | ||
* @param {Number} K - Number of clusters | ||
* @param {Object} [options] - Option object | ||
* @param {Number} [options.maxIterations = 100] - Maximum of iterations allowed | ||
* @param {Number} [options.tolerance = 1e-6] - Error tolerance | ||
* @param {Boolean} [options.withIterations = false] - Store clusters and centroids for each iteration | ||
* @param {Function} [options.distanceFunction = squaredDistance] - Distance function to use between the points | ||
* @param {String|Array<Array<Number>>} [options.initialization = 'moreDistant'] - K centers in format [x,y,z,...] or a method for initialize the data: | ||
* @param {Array<Array<number>>} data - Points in the format to cluster [x,y,z,...] | ||
* @param {number} K - Number of clusters | ||
* @param {object} [options] - Option object | ||
* @param {number} [options.maxIterations = 100] - Maximum of iterations allowed | ||
* @param {number} [options.tolerance = 1e-6] - Error tolerance | ||
* @param {boolean} [options.withIterations = false] - Store clusters and centroids for each iteration | ||
* @param {function} [options.distanceFunction = squaredDistance] - Distance function to use between the points | ||
* @param {string|Array<Array<number>>} [options.initialization = 'moreDistant'] - K centers in format [x,y,z,...] or a method for initialize the data: | ||
* * `'random'` will choose K random different values. | ||
* * `'mostDistant'` will choose the more distant points to a first random pick | ||
* @returns {KMeansResult} - Cluster identifier for each data dot and centroids with the following fields: | ||
* @return {KMeansResult} - Cluster identifier for each data dot and centroids with the following fields: | ||
* * `'clusters'`: Array of indexes for the clusters. | ||
@@ -75,50 +95,56 @@ * * `'centroids'`: Array with the resulting centroids. | ||
*/ | ||
function kmeans(data, K, options) { | ||
options = Object.assign({}, defaultOptions, options); | ||
export default function kmeans(data, K, options) { | ||
options = Object.assign({}, defaultOptions, options); | ||
if (K <= 0 || K > data.length || !Number.isInteger(K)) { | ||
throw new Error('K should be a positive integer bigger than the number of points'); | ||
} | ||
if (K <= 0 || K > data.length || !Number.isInteger(K)) { | ||
throw new Error( | ||
'K should be a positive integer smaller than the number of points' | ||
); | ||
} | ||
var centers; | ||
if (Array.isArray(options.initialization)) { | ||
if (options.initialization.length !== K) { | ||
throw new Error('The initial centers should have the same length as K'); | ||
} else { | ||
centers = options.initialization; | ||
} | ||
var centers; | ||
if (Array.isArray(options.initialization)) { | ||
if (options.initialization.length !== K) { | ||
throw new Error('The initial centers should have the same length as K'); | ||
} else { | ||
switch (options.initialization) { | ||
case 'random': | ||
centers = init.random(data, K); | ||
break; | ||
case 'mostDistant': | ||
centers = init.mostDistant(data, K, utils.calculateDistanceMatrix(data, options.distanceFunction)); | ||
break; | ||
default: | ||
throw new Error('Unknown initialization method: "' + options.initialization + '"'); | ||
} | ||
centers = options.initialization; | ||
} | ||
// infinite loop until convergence | ||
if (options.maxIterations === 0) { | ||
options.maxIterations = Number.MAX_VALUE; | ||
} else { | ||
switch (options.initialization) { | ||
case 'random': | ||
centers = random(data, K); | ||
break; | ||
case 'mostDistant': | ||
centers = mostDistant( | ||
data, | ||
K, | ||
calculateDistanceMatrix(data, options.distanceFunction) | ||
); | ||
break; | ||
default: | ||
throw new Error( | ||
`Unknown initialization method: "${options.initialization}"` | ||
); | ||
} | ||
} | ||
var clusterID = new Array(data.length); | ||
if (options.withIterations) { | ||
return kmeansGenerator(centers, data, clusterID, K, options); | ||
} else { | ||
var converged = false; | ||
var stepNumber = 0; | ||
var stepResult; | ||
while (!converged && (stepNumber < options.maxIterations)) { | ||
stepResult = step(centers, data, clusterID, K, options, ++stepNumber); | ||
converged = stepResult.converged; | ||
centers = stepResult.centroids; | ||
} | ||
return stepResult.computeInformation(data); | ||
// infinite loop until convergence | ||
if (options.maxIterations === 0) { | ||
options.maxIterations = Number.MAX_VALUE; | ||
} | ||
var clusterID = new Array(data.length); | ||
if (options.withIterations) { | ||
return kmeansGenerator(centers, data, clusterID, K, options); | ||
} else { | ||
var converged = false; | ||
var stepNumber = 0; | ||
var stepResult; | ||
while (!converged && stepNumber < options.maxIterations) { | ||
stepResult = step(centers, data, clusterID, K, options, ++stepNumber); | ||
converged = stepResult.converged; | ||
centers = stepResult.centroids; | ||
} | ||
return stepResult.computeInformation(data); | ||
} | ||
} | ||
module.exports = kmeans; |
@@ -1,16 +0,16 @@ | ||
'use strict'; | ||
import { updateClusterID } from './utils'; | ||
const utils = require('./utils'); | ||
const distanceSymbol = Symbol('distance'); | ||
/** | ||
* Result of the kmeans algorithm | ||
* @param {Array<Number>} clusters - the cluster identifier for each data dot | ||
* @param {Array<Array<Object>>} centroids - the K centers in format [x,y,z,...], the error and size of the cluster | ||
* @param {Boolean} converged - Converge criteria satisfied | ||
* @param {Number} iterations - Current number of iterations | ||
* @param {Function} distance - (*Private*) Distance function to use between the points | ||
* @constructor | ||
*/ | ||
function KMeansResult(clusters, centroids, converged, iterations, distance) { | ||
export default class KMeansResult { | ||
/** | ||
* Result of the kmeans algorithm | ||
* @param {Array<number>} clusters - the cluster identifier for each data dot | ||
* @param {Array<Array<object>>} centroids - the K centers in format [x,y,z,...], the error and size of the cluster | ||
* @param {boolean} converged - Converge criteria satisfied | ||
* @param {number} iterations - Current number of iterations | ||
* @param {function} distance - (*Private*) Distance function to use between the points | ||
* @constructor | ||
*/ | ||
constructor(clusters, centroids, converged, iterations, distance) { | ||
this.clusters = clusters; | ||
@@ -21,44 +21,52 @@ this.centroids = centroids; | ||
this[distanceSymbol] = distance; | ||
} | ||
} | ||
/** | ||
* Allows to compute for a new array of points their cluster id | ||
* @param {Array<Array<Number>>} data - the [x,y,z,...] points to cluster | ||
* @return {Array<Number>} - cluster id for each point | ||
*/ | ||
KMeansResult.prototype.nearest = function (data) { | ||
var clusterID = new Array(data.length); | ||
var centroids = this.centroids.map(function (centroid) { | ||
return centroid.centroid; | ||
/** | ||
* Allows to compute for a new array of points their cluster id | ||
* @param {Array<Array<number>>} data - the [x,y,z,...] points to cluster | ||
* @return {Array<number>} - cluster id for each point | ||
*/ | ||
nearest(data) { | ||
const clusterID = new Array(data.length); | ||
const centroids = this.centroids.map(function (centroid) { | ||
return centroid.centroid; | ||
}); | ||
return utils.updateClusterID(data, centroids, clusterID, this[distanceSymbol]); | ||
}; | ||
return updateClusterID(data, centroids, clusterID, this[distanceSymbol]); | ||
} | ||
/** | ||
* Returns a KMeansResult with the error and size of the cluster | ||
* @ignore | ||
* @param {Array<Array<Number>>} data - the [x,y,z,...] points to cluster | ||
* @return {KMeansResult} | ||
*/ | ||
KMeansResult.prototype.computeInformation = function (data) { | ||
/** | ||
* Returns a KMeansResult with the error and size of the cluster | ||
* @ignore | ||
* @param {Array<Array<number>>} data - the [x,y,z,...] points to cluster | ||
* @return {KMeansResult} | ||
*/ | ||
computeInformation(data) { | ||
var enrichedCentroids = this.centroids.map(function (centroid) { | ||
return { | ||
centroid: centroid, | ||
error: 0, | ||
size: 0 | ||
}; | ||
return { | ||
centroid: centroid, | ||
error: 0, | ||
size: 0 | ||
}; | ||
}); | ||
for (var i = 0; i < data.length; i++) { | ||
enrichedCentroids[this.clusters[i]].error += this[distanceSymbol](data[i], this.centroids[this.clusters[i]]); | ||
enrichedCentroids[this.clusters[i]].size++; | ||
enrichedCentroids[this.clusters[i]].error += this[distanceSymbol]( | ||
data[i], | ||
this.centroids[this.clusters[i]] | ||
); | ||
enrichedCentroids[this.clusters[i]].size++; | ||
} | ||
for (var j = 0; j < this.centroids.length; j++) { | ||
enrichedCentroids[j].error /= enrichedCentroids[j].size; | ||
enrichedCentroids[j].error /= enrichedCentroids[j].size; | ||
} | ||
return new KMeansResult(this.clusters, enrichedCentroids, this.converged, this.iterations, this[distanceSymbol]); | ||
}; | ||
module.exports = KMeansResult; | ||
return new KMeansResult( | ||
this.clusters, | ||
enrichedCentroids, | ||
this.converged, | ||
this.iterations, | ||
this[distanceSymbol] | ||
); | ||
} | ||
} |
141
src/utils.js
@@ -1,28 +0,26 @@ | ||
'use strict'; | ||
import nearestVector from 'ml-nearest-vector'; | ||
const nearestVector = require('ml-nearest-vector'); | ||
/** | ||
* Calculates the distance matrix for a given array of points | ||
* @ignore | ||
* @param {Array<Array<Number>>} data - the [x,y,z,...] points to cluster | ||
* @param {Function} distance - Distance function to use between the points | ||
* @return {Array<Array<Number>>} - matrix with the distance values | ||
* @param {Array<Array<number>>} data - the [x,y,z,...] points to cluster | ||
* @param {function} distance - Distance function to use between the points | ||
* @return {Array<Array<number>>} - matrix with the distance values | ||
*/ | ||
function calculateDistanceMatrix(data, distance) { | ||
var distanceMatrix = new Array(data.length); | ||
for (var i = 0; i < data.length; ++i) { | ||
for (var j = i; j < data.length; ++j) { | ||
if (!distanceMatrix[i]) { | ||
distanceMatrix[i] = new Array(data.length); | ||
} | ||
if (!distanceMatrix[j]) { | ||
distanceMatrix[j] = new Array(data.length); | ||
} | ||
const dist = distance(data[i], data[j]); | ||
distanceMatrix[i][j] = dist; | ||
distanceMatrix[j][i] = dist; | ||
} | ||
export function calculateDistanceMatrix(data, distance) { | ||
var distanceMatrix = new Array(data.length); | ||
for (var i = 0; i < data.length; ++i) { | ||
for (var j = i; j < data.length; ++j) { | ||
if (!distanceMatrix[i]) { | ||
distanceMatrix[i] = new Array(data.length); | ||
} | ||
if (!distanceMatrix[j]) { | ||
distanceMatrix[j] = new Array(data.length); | ||
} | ||
const dist = distance(data[i], data[j]); | ||
distanceMatrix[i][j] = dist; | ||
distanceMatrix[j][i] = dist; | ||
} | ||
return distanceMatrix; | ||
} | ||
return distanceMatrix; | ||
} | ||
@@ -33,13 +31,15 @@ | ||
* @ignore | ||
* @param {Array<Array<Number>>} data - the [x,y,z,...] points to cluster | ||
* @param {Array<Array<Number>>} centers - the K centers in format [x,y,z,...] | ||
* @param {Array <Number>} clusterID - the cluster identifier for each data dot | ||
* @param {Function} distance - Distance function to use between the points | ||
* @returns {Array} the cluster identifier for each data dot | ||
* @param {Array<Array<number>>} data - the [x,y,z,...] points to cluster | ||
* @param {Array<Array<number>>} centers - the K centers in format [x,y,z,...] | ||
* @param {Array <number>} clusterID - the cluster identifier for each data dot | ||
* @param {function} distance - Distance function to use between the points | ||
* @return {Array} the cluster identifier for each data dot | ||
*/ | ||
function updateClusterID(data, centers, clusterID, distance) { | ||
for (var i = 0; i < data.length; i++) { | ||
clusterID[i] = nearestVector(centers, data[i], {distanceFunction: distance}); | ||
} | ||
return clusterID; | ||
export function updateClusterID(data, centers, clusterID, distance) { | ||
for (var i = 0; i < data.length; i++) { | ||
clusterID[i] = nearestVector(centers, data[i], { | ||
distanceFunction: distance | ||
}); | ||
} | ||
return clusterID; | ||
} | ||
@@ -50,36 +50,36 @@ | ||
* @ignore | ||
* @param {Array <Array <Number>>} data - the [x,y,z,...] points to cluster | ||
* @param {Array <Number>} clusterID - the cluster identifier for each data dot | ||
* @param {Number} K - Number of clusters | ||
* @returns {Array} he K centers in format [x,y,z,...] | ||
* @param {Array <Array <number>>} data - the [x,y,z,...] points to cluster | ||
* @param {Array <number>} clusterID - the cluster identifier for each data dot | ||
* @param {number} K - Number of clusters | ||
* @return {Array} he K centers in format [x,y,z,...] | ||
*/ | ||
function updateCenters(data, clusterID, K) { | ||
const nDim = data[0].length; | ||
export function updateCenters(data, clusterID, K) { | ||
const nDim = data[0].length; | ||
// creates empty centers with 0 size | ||
var centers = new Array(K); | ||
var centersLen = new Array(K); | ||
for (var i = 0; i < K; i++) { | ||
centers[i] = new Array(nDim); | ||
centersLen[i] = 0; | ||
for (var j = 0; j < nDim; j++) { | ||
centers[i][j] = 0; | ||
} | ||
// creates empty centers with 0 size | ||
var centers = new Array(K); | ||
var centersLen = new Array(K); | ||
for (var i = 0; i < K; i++) { | ||
centers[i] = new Array(nDim); | ||
centersLen[i] = 0; | ||
for (var j = 0; j < nDim; j++) { | ||
centers[i][j] = 0; | ||
} | ||
} | ||
// add the value for all dimensions of the point | ||
for (var l = 0; l < data.length; l++) { | ||
centersLen[clusterID[l]]++; | ||
for (var dim = 0; dim < nDim; dim++) { | ||
centers[clusterID[l]][dim] += data[l][dim]; | ||
} | ||
// add the value for all dimensions of the point | ||
for (var l = 0; l < data.length; l++) { | ||
centersLen[clusterID[l]]++; | ||
for (var dim = 0; dim < nDim; dim++) { | ||
centers[clusterID[l]][dim] += data[l][dim]; | ||
} | ||
} | ||
// divides by length | ||
for (var id = 0; id < K; id++) { | ||
for (var d = 0; d < nDim; d++) { | ||
centers[id][d] /= centersLen[id]; | ||
} | ||
// divides by length | ||
for (var id = 0; id < K; id++) { | ||
for (var d = 0; d < nDim; d++) { | ||
centers[id][d] /= centersLen[id]; | ||
} | ||
return centers; | ||
} | ||
return centers; | ||
} | ||
@@ -90,20 +90,15 @@ | ||
* @ignore | ||
* @param {Array<Array<Number>>} centers - the K centers in format [x,y,z,...] | ||
* @param {Array<Array<Number>>} oldCenters - the K old centers in format [x,y,z,...] | ||
* @param {Function} distanceFunction - Distance function to use between the points | ||
* @param {Number} tolerance - Allowed distance for the centroids to move | ||
* @param {Array<Array<number>>} centers - the K centers in format [x,y,z,...] | ||
* @param {Array<Array<number>>} oldCenters - the K old centers in format [x,y,z,...] | ||
* @param {function} distanceFunction - Distance function to use between the points | ||
* @param {number} tolerance - Allowed distance for the centroids to move | ||
* @return {boolean} | ||
*/ | ||
function converged(centers, oldCenters, distanceFunction, tolerance) { | ||
for (var i = 0; i < centers.length; i++) { | ||
if (distanceFunction(centers[i], oldCenters[i]) > tolerance) { | ||
return false; | ||
} | ||
export function hasConverged(centers, oldCenters, distanceFunction, tolerance) { | ||
for (var i = 0; i < centers.length; i++) { | ||
if (distanceFunction(centers[i], oldCenters[i]) > tolerance) { | ||
return false; | ||
} | ||
return true; | ||
} | ||
return true; | ||
} | ||
exports.updateClusterID = updateClusterID; | ||
exports.updateCenters = updateCenters; | ||
exports.calculateDistanceMatrix = calculateDistanceMatrix; | ||
exports.converged = converged; |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
31229
10
731
56