@thi.ng/k-means
Advanced tools
Comparing version 0.2.5 to 0.3.0
@@ -6,2 +6,13 @@ # Change Log | ||
# [0.3.0](https://github.com/thi-ng/umbrella/compare/@thi.ng/k-means@0.2.5...@thi.ng/k-means@0.3.0) (2021-08-04) | ||
### Features | ||
* **k-means:** auto-correct `k` if needed ([d3c3ffa](https://github.com/thi-ng/umbrella/commit/d3c3ffa768bdebe67843c8094af1fe7a9bc524ed)) | ||
## [0.2.5](https://github.com/thi-ng/umbrella/compare/@thi.ng/k-means@0.2.4...@thi.ng/k-means@0.2.5) (2021-08-04) | ||
@@ -8,0 +19,0 @@ |
@@ -5,4 +5,4 @@ import { IDistance } from "@thi.ng/distance"; | ||
/** | ||
* Takes an array of n-dimensional `samples` and attempts to assign them to `k` | ||
* clusters, using the behavior defined by (optionally) given `opts`. | ||
* Takes an array of n-dimensional `samples` and attempts to assign them to up | ||
* to `k` clusters, using the behavior defined by (optionally) given `opts`. | ||
* | ||
@@ -15,3 +15,2 @@ * @remarks | ||
* @param opts | ||
* @returns | ||
*/ | ||
@@ -24,2 +23,7 @@ export declare const kmeans: <T extends ReadonlyVec>(k: number, samples: T[], opts?: Partial<KMeansOpts> | undefined) => Cluster[]; | ||
* @remarks | ||
* Might return fewer than `k` centroid IDs if the requested number cannot be | ||
* fulfilled (e.g. due to lower number of samples and/or distance metric). | ||
* Throws an error if `samples` are empty. | ||
* | ||
* @remarks | ||
* References: | ||
@@ -26,0 +30,0 @@ * - https://en.wikipedia.org/wiki/K-means%2B%2B |
@@ -6,4 +6,4 @@ import { assert } from "@thi.ng/api"; | ||
/** | ||
* Takes an array of n-dimensional `samples` and attempts to assign them to `k` | ||
* clusters, using the behavior defined by (optionally) given `opts`. | ||
* Takes an array of n-dimensional `samples` and attempts to assign them to up | ||
* to `k` clusters, using the behavior defined by (optionally) given `opts`. | ||
* | ||
@@ -16,3 +16,2 @@ * @remarks | ||
* @param opts | ||
* @returns | ||
*/ | ||
@@ -24,3 +23,4 @@ export const kmeans = (k, samples, opts) => { | ||
const centroidIDs = initial || initKmeanspp(k, samples, dist, rnd); | ||
assert(centroidIDs.length === k, `wrong number of initial centroids`); | ||
assert(centroidIDs.length > 0, `missing initial centroids`); | ||
k = centroidIDs.length; | ||
const centroids = centroidIDs.map((i) => samples[i]); | ||
@@ -57,2 +57,7 @@ const clusters = []; | ||
* @remarks | ||
* Might return fewer than `k` centroid IDs if the requested number cannot be | ||
* fulfilled (e.g. due to lower number of samples and/or distance metric). | ||
* Throws an error if `samples` are empty. | ||
* | ||
* @remarks | ||
* References: | ||
@@ -70,3 +75,4 @@ * - https://en.wikipedia.org/wiki/K-means%2B%2B | ||
const num = samples.length; | ||
assert(num >= k, `insufficient samples for k=${k}`); | ||
assert(num > 0, `missing samples`); | ||
k = Math.min(k, num); | ||
const centroidIDs = [rnd.int() % num]; | ||
@@ -77,4 +83,11 @@ const centroids = [samples[centroidIDs[0]]]; | ||
while (centroidIDs.length < k) { | ||
let probs = samples.map((p) => dist.from(metric(p, centroids[argmin(p, centroids, dist)])) ** | ||
2); | ||
let psum = 0; | ||
const probs = samples.map((p) => { | ||
const d = dist.from(metric(p, centroids[argmin(p, centroids, dist)])) ** | ||
2; | ||
psum += d; | ||
return d; | ||
}); | ||
if (!psum) | ||
break; | ||
let id; | ||
@@ -81,0 +94,0 @@ do { |
@@ -15,3 +15,4 @@ 'use strict'; | ||
const centroidIDs = initial || initKmeanspp(k, samples, dist, rnd); | ||
api.assert(centroidIDs.length === k, `wrong number of initial centroids`); | ||
api.assert(centroidIDs.length > 0, `missing initial centroids`); | ||
k = centroidIDs.length; | ||
const centroids = centroidIDs.map((i) => samples[i]); | ||
@@ -45,3 +46,4 @@ const clusters = []; | ||
const num = samples.length; | ||
api.assert(num >= k, `insufficient samples for k=${k}`); | ||
api.assert(num > 0, `missing samples`); | ||
k = Math.min(k, num); | ||
const centroidIDs = [rnd.int() % num]; | ||
@@ -52,4 +54,11 @@ const centroids = [samples[centroidIDs[0]]]; | ||
while (centroidIDs.length < k) { | ||
let probs = samples.map((p) => dist.from(metric(p, centroids[distance.argmin(p, centroids, dist)])) ** | ||
2); | ||
let psum = 0; | ||
const probs = samples.map((p) => { | ||
const d = dist.from(metric(p, centroids[distance.argmin(p, centroids, dist)])) ** | ||
2; | ||
psum += d; | ||
return d; | ||
}); | ||
if (!psum) | ||
break; | ||
let id; | ||
@@ -56,0 +65,0 @@ do { |
@@ -1,1 +0,1 @@ | ||
!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports,require("@thi.ng/api"),require("@thi.ng/distance"),require("@thi.ng/random"),require("@thi.ng/vectors")):"function"==typeof define&&define.amd?define(["exports","@thi.ng/api","@thi.ng/distance","@thi.ng/random","@thi.ng/vectors"],t):t(((e="undefined"!=typeof globalThis?globalThis:e||self).thi=e.thi||{},e.thi.ng=e.thi.ng||{},e.thi.ng.kMeans={}),e.thi.ng.api,e.thi.ng.distance,e.thi.ng.random,e.thi.ng.vectors)}(this,(function(e,t,n,i,s){"use strict";const r=(e,s,r=n.DIST_SQ,o=i.SYSTEM)=>{const a=s.length;t.assert(a>=e,`insufficient samples for k=${e}`);const h=[o.int()%a],d=[s[h[0]]],l=new Array(a).fill(0).map(((e,t)=>t)),u=r.metric;for(;h.length<e;){let e,t=s.map((e=>r.from(u(e,d[n.argmin(e,d,r)]))**2));do{e=i.weightedRandom(l,t,o)()}while(h.includes(e));h.push(e),d.push(s[e])}return h},o=(e,t,i,s)=>{let r=!1;for(let o=0,a=e.length;o<a;o++){const a=n.argmin(e[o],t,s);a!==i[o]&&(i[o]=a,r=!0)}return r},a=(e,t)=>{const n=[];for(let i=0,s=t.length;i<s;i++){const s=t[i];(n[s]||(n[s]={id:s,centroid:e[s],items:[]})).items.push(i)}return n},h=e=>{const t=s.zeroes(e);let n=0;return{update:e=>{s.add(t,t,e),n++},finish:()=>n?s.mulN(t,t,1/n):void 0}};e.initKmeanspp=r,e.kmeans=(e,s,d)=>{let{dist:l,initial:u,maxIter:g,rnd:f,strategy:c}=Object.assign({dist:n.DIST_SQ,maxIter:32,strategy:h},d);const m=s.length,p=s[0].length,y=u||r(e,s,l,f);t.assert(y.length===e,"wrong number of initial centroids");const b=y.map((e=>s[e])),v=[];let S=!0;e:for(;S&&g-- >0;){S=o(s,b,v,l);for(let t=0;t<e;t++){const e=c(p);for(let n=0;n<m;n++)t===v[n]&&e.update(s[n]);const n=e.finish();if(n)b[t]=n;else{const e=y.length;if(i.uniqueIndices(1,m,y,void 0,f),y.length===e)break e;b[t]=s[y[e]],S=!0}}}return a(b,v)},e.means=h,e.meansLatLon=()=>{let e=0,t=0,n=0;return{update:([i,s])=>{e+=i<0?i+360:i,t+=s,n++},finish:()=>{if(n)return e/=n,e>180&&(e-=360),t/=n,[e,t]}}},e.medians=()=>{const e=[];return{update:t=>e.push(t),finish:()=>e.length?s.median([],e):void 0}},Object.defineProperty(e,"__esModule",{value:!0})})); | ||
!function(t,e){"object"==typeof exports&&"undefined"!=typeof module?e(exports,require("@thi.ng/api"),require("@thi.ng/distance"),require("@thi.ng/random"),require("@thi.ng/vectors")):"function"==typeof define&&define.amd?define(["exports","@thi.ng/api","@thi.ng/distance","@thi.ng/random","@thi.ng/vectors"],e):e(((t="undefined"!=typeof globalThis?globalThis:t||self).thi=t.thi||{},t.thi.ng=t.thi.ng||{},t.thi.ng.kMeans={}),t.thi.ng.api,t.thi.ng.distance,t.thi.ng.random,t.thi.ng.vectors)}(this,(function(t,e,n,i,s){"use strict";const r=(t,s,r=n.DIST_SQ,o=i.SYSTEM)=>{const a=s.length;e.assert(a>0,"missing samples"),t=Math.min(t,a);const h=[o.int()%a],d=[s[h[0]]],l=new Array(a).fill(0).map(((t,e)=>e)),g=r.metric;for(;h.length<t;){let t=0;const e=s.map((e=>{const i=r.from(g(e,d[n.argmin(e,d,r)]))**2;return t+=i,i}));if(!t)break;let a;do{a=i.weightedRandom(l,e,o)()}while(h.includes(a));h.push(a),d.push(s[a])}return h},o=(t,e,i,s)=>{let r=!1;for(let o=0,a=t.length;o<a;o++){const a=n.argmin(t[o],e,s);a!==i[o]&&(i[o]=a,r=!0)}return r},a=(t,e)=>{const n=[];for(let i=0,s=e.length;i<s;i++){const s=e[i];(n[s]||(n[s]={id:s,centroid:t[s],items:[]})).items.push(i)}return n},h=t=>{const e=s.zeroes(t);let n=0;return{update:t=>{s.add(e,e,t),n++},finish:()=>n?s.mulN(e,e,1/n):void 0}};t.initKmeanspp=r,t.kmeans=(t,s,d)=>{let{dist:l,initial:g,maxIter:u,rnd:c,strategy:f}=Object.assign({dist:n.DIST_SQ,maxIter:32,strategy:h},d);const m=s.length,p=s[0].length,y=g||r(t,s,l,c);e.assert(y.length>0,"missing initial centroids"),t=y.length;const b=y.map((t=>s[t])),v=[];let S=!0;t:for(;S&&u-- >0;){S=o(s,b,v,l);for(let e=0;e<t;e++){const t=f(p);for(let n=0;n<m;n++)e===v[n]&&t.update(s[n]);const n=t.finish();if(n)b[e]=n;else{const t=y.length;if(i.uniqueIndices(1,m,y,void 0,c),y.length===t)break t;b[e]=s[y[t]],S=!0}}}return a(b,v)},t.means=h,t.meansLatLon=()=>{let t=0,e=0,n=0;return{update:([i,s])=>{t+=i<0?i+360:i,e+=s,n++},finish:()=>{if(n)return t/=n,t>180&&(t-=360),e/=n,[t,e]}}},t.medians=()=>{const t=[];return{update:e=>t.push(e),finish:()=>t.length?s.median([],t):void 0}},Object.defineProperty(t,"__esModule",{value:!0})})); |
{ | ||
"name": "@thi.ng/k-means", | ||
"version": "0.2.5", | ||
"version": "0.3.0", | ||
"description": "Configurable k-means & k-medians (with k-means++ initialization) for n-D vectors", | ||
@@ -42,5 +42,5 @@ "module": "./index.js", | ||
"@thi.ng/api": "^7.1.7", | ||
"@thi.ng/distance": "^0.3.5", | ||
"@thi.ng/random": "^2.4.3", | ||
"@thi.ng/vectors": "^6.0.4" | ||
"@thi.ng/distance": "^0.3.6", | ||
"@thi.ng/random": "^2.4.4", | ||
"@thi.ng/vectors": "^6.0.5" | ||
}, | ||
@@ -69,3 +69,3 @@ "files": [ | ||
}, | ||
"gitHead": "a85a4dd69e086a18d97d22204667c6b5334b01e0" | ||
"gitHead": "5a289330f80e3c253c3b434655825c5dcfaebfd2" | ||
} |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
47321
433
Updated@thi.ng/distance@^0.3.6
Updated@thi.ng/random@^2.4.4
Updated@thi.ng/vectors@^6.0.5