Comparing version 0.7.0 to 0.8.0
@@ -30,3 +30,3 @@ "use strict"; | ||
*/ | ||
function eudist(v1, v2) { | ||
function eudist(v1, v2, sqrt) { | ||
var len = v1.length; | ||
@@ -40,23 +40,72 @@ var sum = 0; | ||
// Square root not really needed | ||
return sum; //Math.sqrt(sum); | ||
return sqrt ? Math.sqrt(sum) : sum; | ||
} | ||
/** | ||
* Manhattan distance | ||
* Unidimensional distance | ||
*/ | ||
function mandist(v1, v2) { | ||
var len = v1.length; | ||
var sum = 0; | ||
function dist(v1, v2, sqrt) { | ||
var d = Math.abs(v1 - v2); | ||
return sqrt ? d : d * d; | ||
} | ||
for (var i = 0; i < len; i++) { | ||
sum += Math.abs((v1[i] || 0) - (v2[i] || 0)); | ||
/** | ||
* K-means++ initial centroid selection | ||
*/ | ||
function kmpp(data, k) { | ||
var _this = this; | ||
var dfn = data[0].length ? eudist : dist; | ||
var ks = [], | ||
len = data.length; | ||
// First random centroid | ||
var c = data[Math.floor(Math.random() * len)]; | ||
ks.push(c); | ||
// Retrieve next centroids | ||
var _loop = function _loop() { | ||
// Min Distances | ||
var dists = data.map(function (v) { | ||
// Return the min distance of v to current centroids | ||
var ksd = ks.map(function (c) { | ||
return dfn(v, c); | ||
}); | ||
return Math.min.apply(_this, ksd); | ||
}); | ||
// Distance sum | ||
var dsum = dists.reduce(function (r, v) { | ||
return r + v; | ||
}, 0); | ||
// Probabilities and cummulative prob (cumsum) | ||
var prs = dists.map(function (d, i) { | ||
return { i: i, v: data[i], pr: d / dsum }; | ||
}); | ||
prs.sort(function (a, b) { | ||
return a.pr - b.pr; | ||
}); | ||
prs.forEach(function (p, i) { | ||
p.cs = p.pr + (i > 0 ? prs[i - 1].cs : 0); | ||
}); | ||
// Randomize | ||
var rnd = Math.random(); | ||
// Gets only the items whose cumsum >= rnd | ||
var mprs = prs.filter(function (p) { | ||
return p.cs >= rnd; | ||
}); | ||
// this is our new centroid | ||
ks.push(mprs[0].v); | ||
}; | ||
while (ks.length < k) { | ||
_loop(); | ||
} | ||
return sum; | ||
} | ||
function equals(v1, v2, multi) { | ||
var l = v1.length; | ||
for (var i = 0; i < l; i++) { | ||
if (v1[i] != v2[i]) return false; | ||
}return true; | ||
return ks; | ||
} | ||
@@ -89,2 +138,4 @@ | ||
} | ||
} else if (initial == "kmpp") { | ||
ks = kmpp(data, k); | ||
} else { | ||
@@ -91,0 +142,0 @@ ks = initial; |
@@ -1,3 +0,3 @@ | ||
/*! skmeans 2017-07-14 */ | ||
/*! skmeans 2017-07-17 */ | ||
"use strict";!function r(o,n,e){function f(a,i){if(!n[a]){if(!o[a]){var u="function"==typeof require&&require;if(!i&&u)return u(a,!0);if(t)return t(a,!0);var v=new Error("Cannot find module '"+a+"'");throw v.code="MODULE_NOT_FOUND",v}var s=n[a]={exports:{}};o[a][0].call(s.exports,function(r){var n=o[a][1][r];return f(n||r)},s,s.exports,r,o,n,e)}return n[a].exports}for(var t="function"==typeof require&&require,a=0;a<e.length;a++)f(e[a]);return f}({1:[function(r,o,n){!function(o){var n=r("./main.js");o.skmeans=n}(window)},{"./main.js":2}],2:[function(r,o,n){function e(r,o){for(var n=r.length,e=0,f=0;f<n;f++){var t=(r[f]||0)-(o[f]||0);e+=t*t}return e}function f(r,o,n){n=n||[];for(var e=0;e<r;e++)n[e]=o;return n}var t=1e4;o.exports=function(r,o,n,a){var i=[],u=[],v=[],s=[],c=!1,l=a||t,h=r.length,d=r[0].length,p=d>0;if(n)i=n;else for(var x=0;x<o;x++)i.push(r[Math.floor(Math.random()*h)]);do{for(var m=0;m<h;m++){for(var w=1/0,g=0,k=0;k<o;k++)(s=p?e(r[m],i[k]):Math.abs(r[m]-i[k]))<w&&(w=s,g=k);v[m]=g}for(var q=[],M=[],u=[],b=0;b<o;b++)q[b]=0,M[b]=p?f(d,0,M[b]):0,u[b]=i[b];if(p){for(var O=0;O<o;O++)i[O]=[];for(var j=0;j<h;j++){for(var y=v[j],D=M[y],E=r[j],N=0;N<d;N++)D[N]+=E[N];q[y]++}c=!0;for(var U=0;U<o;U++){for(var _=i[U],C=M[U],F=u[U],L=q[U],T=0;T<d;T++)_[T]=C[T]/L||0;if(c)for(var z=0;z<d;z++)if(F[z]!=_[z]){c=!1;break}}}else{for(var A=0;A<h;A++){var B=v[A];M[B]+=r[A],q[B]++}for(var G=0;G<o;G++)i[G]=M[G]/q[G]||0;c=!0;for(var H=0;H<o;H++)if(u[H]!=i[H]){c=!1;break}}c=c||--l<=0}while(!c);return{it:t-l,k:o,idxs:v,centroids:i}}},{}]},{},[1]); | ||
"use strict";!function r(n,t,o){function a(f,i){if(!t[f]){if(!n[f]){var u="function"==typeof require&&require;if(!i&&u)return u(f,!0);if(e)return e(f,!0);var v=new Error("Cannot find module '"+f+"'");throw v.code="MODULE_NOT_FOUND",v}var c=t[f]={exports:{}};n[f][0].call(c.exports,function(r){var t=n[f][1][r];return a(t||r)},c,c.exports,r,n,t,o)}return t[f].exports}for(var e="function"==typeof require&&require,f=0;f<o.length;f++)a(o[f]);return a}({1:[function(r,n,t){!function(n){var t=r("./main.js");n.skmeans=t}(window)},{"./main.js":2}],2:[function(r,n,t){function o(r,n,t){for(var o=r.length,a=0,e=0;e<o;e++){var f=(r[e]||0)-(n[e]||0);a+=f*f}return t?Math.sqrt(a):a}function a(r,n,t){var o=Math.abs(r-n);return t?o:o*o}function e(r,n){var t=this,e=r[0].length?o:a,f=[],i=r.length,u=r[Math.floor(Math.random()*i)];f.push(u);for(;f.length<n;)!function(){var n=r.map(function(r){var n=f.map(function(n){return e(r,n)});return Math.min.apply(t,n)}),o=n.reduce(function(r,n){return r+n},0),a=n.map(function(n,t){return{i:t,v:r[t],pr:n/o}});a.sort(function(r,n){return r.pr-n.pr}),a.forEach(function(r,n){r.cs=r.pr+(n>0?a[n-1].cs:0)});var i=Math.random(),u=a.filter(function(r){return r.cs>=i});f.push(u[0].v)}();return f}function f(r,n,t){t=t||[];for(var o=0;o<r;o++)t[o]=n;return t}var i=1e4;n.exports=function(r,n,t,a){var u=[],v=[],c=[],s=[],h=!1,p=a||i,l=r.length,m=r[0].length,d=m>0;if(t)u="kmpp"==t?e(r,n):t;else for(var M=0;M<n;M++)u.push(r[Math.floor(Math.random()*l)]);do{for(var g=0;g<l;g++){for(var x=1/0,k=0,q=0;q<n;q++)(s=d?o(r[g],u[q]):Math.abs(r[g]-u[q]))<x&&(x=s,k=q);c[g]=k}for(var w=[],b=[],v=[],y=0;y<n;y++)w[y]=0,b[y]=d?f(m,0,b[y]):0,v[y]=u[y];if(d){for(var E=0;E<n;E++)u[E]=[];for(var O=0;O<l;O++){for(var j=c[O],D=b[j],N=r[O],U=0;U<m;U++)D[U]+=N[U];w[j]++}h=!0;for(var _=0;_<n;_++){for(var C=u[_],F=b[_],L=v[_],T=w[_],z=0;z<m;z++)C[z]=F[z]/T||0;if(h)for(var A=0;A<m;A++)if(L[A]!=C[A]){h=!1;break}}}else{for(var B=0;B<l;B++){var G=c[B];b[G]+=r[B],w[G]++}for(var H=0;H<n;H++)u[H]=b[H]/w[H]||0;h=!0;for(var I=0;I<n;I++)if(v[I]!=u[I]){h=!1;break}}h=h||--p<=0}while(!h);return{it:i-p,k:n,idxs:c,centroids:u}}},{}]},{},[1]); |
62
main.js
@@ -8,3 +8,3 @@ /*jshint esversion: 6 */ | ||
*/ | ||
function eudist(v1,v2) { | ||
function eudist(v1,v2,sqrt) { | ||
var len = v1.length; | ||
@@ -18,23 +18,52 @@ var sum = 0; | ||
// Square root not really needed | ||
return sum; //Math.sqrt(sum); | ||
return sqrt? Math.sqrt(sum) : sum; | ||
} | ||
/** | ||
* Manhattan distance | ||
* Unidimensional distance | ||
*/ | ||
function mandist(v1,v2) { | ||
var len = v1.length; | ||
var sum = 0; | ||
function dist(v1,v2,sqrt) { | ||
var d = Math.abs(v1-v2); | ||
return sqrt? d : d*d; | ||
} | ||
for(let i=0;i<len;i++) { | ||
sum += Math.abs((v1[i]||0) - (v2[i]||0)); | ||
/** | ||
* K-means++ initial centroid selection | ||
*/ | ||
function kmpp(data,k) { | ||
var dfn = data[0].length? eudist : dist; | ||
var ks = [], len = data.length; | ||
// First random centroid | ||
var c = data[Math.floor(Math.random()*len)]; | ||
ks.push(c); | ||
// Retrieve next centroids | ||
while(ks.length<k) { | ||
// Min Distances | ||
let dists = data.map(v=>{ | ||
// Return the min distance of v to current centroids | ||
let ksd = ks.map(c=>dfn(v,c)); | ||
return Math.min.apply(this,ksd); | ||
}); | ||
// Distance sum | ||
let dsum = dists.reduce((r,v)=>r+v,0); | ||
// Probabilities and cummulative prob (cumsum) | ||
let prs = dists.map((d,i)=>{return {i:i,v:data[i],pr:d/dsum}}); | ||
prs.sort((a,b)=>a.pr-b.pr); | ||
prs.forEach((p,i)=>{p.cs = p.pr + (i>0? prs[i-1].cs : 0)}); | ||
// Randomize | ||
let rnd = Math.random(); | ||
// Gets only the items whose cumsum >= rnd | ||
let mprs = prs.filter(p=>p.cs>=rnd); | ||
// this is our new centroid | ||
ks.push(mprs[0].v); | ||
} | ||
return sum; | ||
} | ||
function equals(v1,v2,multi) { | ||
var l = v1.length; | ||
for(var i=0;i<l;i++) | ||
if(v1[i]!=v2[i]) return false; | ||
return true; | ||
return ks; | ||
} | ||
@@ -61,2 +90,5 @@ | ||
} | ||
else if(initial=="kmpp") { | ||
ks = kmpp(data,k); | ||
} | ||
else { | ||
@@ -63,0 +95,0 @@ ks = initial; |
{ | ||
"name": "skmeans", | ||
"version": "0.7.0", | ||
"version": "0.8.0", | ||
"description": "Super fast simple kmeans clustering for unidimiensional and multidimensional data. Works in node and browser", | ||
@@ -5,0 +5,0 @@ "author": "David Gómez Matarrodona <solzimer@gmail.com>", |
@@ -53,3 +53,3 @@ # skmeans | ||
* **k** Number of clusters | ||
* **centroids** Optional. Initial centroid values. If not provided, the algorith will try to choose an apropiate ones. | ||
* **centroids** Optional. Initial centroid values. If not provided, the algorith will try to choose an apropiate ones. You can pass **"kmpp"** as argument, so the algorythm will use the [k-means++](https://en.wikipedia.org/wiki/K-means%2B%2B) cluster initialization method. | ||
* **iterations** Optional. Maximum number of iterations. If not provided, it will be set to 10000. | ||
@@ -56,0 +56,0 @@ |
@@ -13,1 +13,4 @@ const skmeans = require("../main.js"); | ||
console.log(res.it,res.centroids); | ||
var res = skmeans(data,3,"kmpp"); | ||
console.log(res.it,res.centroids); |
@@ -13,1 +13,4 @@ const skmeans = require("../main.js"); | ||
console.log(res.it,res.centroids); | ||
var res = skmeans(data,3,"kmpp"); | ||
console.log(res.it,res.centroids); |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
31085
489