@thi.ng/k-means
Advanced tools
Comparing version 0.1.0 to 0.2.0
@@ -6,2 +6,13 @@ # Change Log | ||
# [0.2.0](https://github.com/thi-ng/umbrella/compare/@thi.ng/k-means@0.1.0...@thi.ng/k-means@0.2.0) (2021-04-20) | ||
### Features | ||
* **k-means:** add meansLatLon centroid strategy, docstrings ([269c11c](https://github.com/thi-ng/umbrella/commit/269c11c10907351d98acfb929af5036a23a2e5c3)) | ||
# 0.1.0 (2021-04-19) | ||
@@ -8,0 +19,0 @@ |
import { IDistance } from "@thi.ng/distance"; | ||
import { ReadonlyVec } from "@thi.ng/vectors"; | ||
import type { CentroidStrategy, Cluster, KMeansOpts } from "./api"; | ||
/** | ||
* Takes an array of n-dimensional `samples` and attempts to assign them to `k` | ||
* clusters, using the behavior defined by (optionally) given `opts`. | ||
* | ||
* @remarks | ||
* https://en.wikipedia.org/wiki/K-medians_clustering | ||
* | ||
* @param k | ||
* @param samples | ||
* @param opts | ||
* @returns | ||
*/ | ||
export declare const kmeans: <T extends ReadonlyVec>(k: number, samples: T[], opts?: Partial<KMeansOpts> | undefined) => Cluster[]; | ||
/** | ||
* k-means++ initialization / selection of initial cluster centroids. | ||
* k-means++ initialization / selection of initial cluster centroids. Default | ||
* centroid initialization method for {@link kmeans}. | ||
* | ||
@@ -20,4 +33,23 @@ * @remarks | ||
export declare const initKmeanspp: <T extends ReadonlyVec>(k: number, samples: T[], dist?: IDistance<ReadonlyVec>, rnd?: import("@thi.ng/random").SystemRandom) => number[]; | ||
/** | ||
* Default centroid strategy forming new centroids by averaging the position of | ||
* participating samples. | ||
* | ||
* @param dim | ||
*/ | ||
export declare const means: CentroidStrategy; | ||
/** | ||
* Centroid strategy forming new centroids via componentwise medians. | ||
* | ||
* @remarks | ||
* https://en.wikipedia.org/wiki/K-medians_clustering | ||
*/ | ||
export declare const medians: CentroidStrategy; | ||
/** | ||
* Means centroid strategy for WGS84 lat/lon positions. Unlike the default | ||
* {@link means} strategy, this one treats latitude values correctly in terms of | ||
* the ±180 deg boundary and ensures samples on either side of the Pacific are | ||
* forming correct centroids. | ||
*/ | ||
export declare const meansWgs84: CentroidStrategy; | ||
//# sourceMappingURL=kmeans.d.ts.map |
@@ -5,2 +5,14 @@ import { assert } from "@thi.ng/api"; | ||
import { add, median, mulN, zeroes } from "@thi.ng/vectors"; | ||
/** | ||
* Takes an array of n-dimensional `samples` and attempts to assign them to `k` | ||
* clusters, using the behavior defined by (optionally) given `opts`. | ||
* | ||
* @remarks | ||
* https://en.wikipedia.org/wiki/K-medians_clustering | ||
* | ||
* @param k | ||
* @param samples | ||
* @param opts | ||
* @returns | ||
*/ | ||
export const kmeans = (k, samples, opts) => { | ||
@@ -39,3 +51,4 @@ let { dist, initial, maxIter, rnd, strategy } = Object.assign({ dist: DIST_SQ, maxIter: 32, strategy: means }, opts); | ||
/** | ||
* k-means++ initialization / selection of initial cluster centroids. | ||
* k-means++ initialization / selection of initial cluster centroids. Default | ||
* centroid initialization method for {@link kmeans}. | ||
* | ||
@@ -55,8 +68,13 @@ * @remarks | ||
const num = samples.length; | ||
assert(num >= k, `insufficient samples for k=${k}`); | ||
const centroidIDs = [rnd.int() % num]; | ||
const centroids = [samples[centroidIDs[0]]]; | ||
const indices = new Array(num).fill(0).map((_, i) => i); | ||
const metric = dist.metric; | ||
while (centroidIDs.length < k) { | ||
let probs = samples.map((p) => dist.from(dist.metric(p, centroids[argmin(p, centroids)])) ** 2); | ||
const id = weightedRandom(indices, probs)(); | ||
let probs = samples.map((p) => dist.from(metric(p, centroids[argmin(p, centroids)])) ** 2); | ||
let id; | ||
do { | ||
id = weightedRandom(indices, probs)(); | ||
} while (centroidIDs.includes(id)); | ||
centroidIDs.push(id); | ||
@@ -87,2 +105,8 @@ centroids.push(samples[id]); | ||
}; | ||
/** | ||
* Default centroid strategy forming new centroids by averaging the position of | ||
* participating samples. | ||
* | ||
* @param dim | ||
*/ | ||
export const means = (dim) => { | ||
@@ -99,2 +123,8 @@ const acc = zeroes(dim); | ||
}; | ||
/** | ||
* Centroid strategy forming new centroids via componentwise medians. | ||
* | ||
* @remarks | ||
* https://en.wikipedia.org/wiki/K-medians_clustering | ||
*/ | ||
export const medians = () => { | ||
@@ -107,1 +137,28 @@ const acc = []; | ||
}; | ||
/** | ||
* Means centroid strategy for WGS84 lat/lon positions. Unlike the default | ||
* {@link means} strategy, this one treats latitude values correctly in terms of | ||
* the ±180 deg boundary and ensures samples on either side of the Pacific are | ||
* forming correct centroids. | ||
*/ | ||
export const meansWgs84 = () => { | ||
let lat = 0; | ||
let lon = 0; | ||
let n = 0; | ||
return { | ||
update: ([$lat, $lon]) => { | ||
lat += $lat < 0 ? $lat + 360 : $lat; | ||
lon += $lon; | ||
n++; | ||
}, | ||
finish: () => { | ||
if (!n) | ||
return; | ||
lat /= n; | ||
if (lat > 180) | ||
lat -= 360; | ||
lon /= n; | ||
return [lat, lon]; | ||
}, | ||
}; | ||
}; |
@@ -44,8 +44,13 @@ 'use strict'; | ||
const num = samples.length; | ||
api.assert(num >= k, `insufficient samples for k=${k}`); | ||
const centroidIDs = [rnd.int() % num]; | ||
const centroids = [samples[centroidIDs[0]]]; | ||
const indices = new Array(num).fill(0).map((_, i) => i); | ||
const metric = dist.metric; | ||
while (centroidIDs.length < k) { | ||
let probs = samples.map((p) => dist.from(dist.metric(p, centroids[distance.argmin(p, centroids)])) ** 2); | ||
const id = random.weightedRandom(indices, probs)(); | ||
let probs = samples.map((p) => dist.from(metric(p, centroids[distance.argmin(p, centroids)])) ** 2); | ||
let id; | ||
do { | ||
id = random.weightedRandom(indices, probs)(); | ||
} while (centroidIDs.includes(id)); | ||
centroidIDs.push(id); | ||
@@ -94,2 +99,23 @@ centroids.push(samples[id]); | ||
}; | ||
const meansWgs84 = () => { | ||
let lat = 0; | ||
let lon = 0; | ||
let n = 0; | ||
return { | ||
update: ([$lat, $lon]) => { | ||
lat += $lat < 0 ? $lat + 360 : $lat; | ||
lon += $lon; | ||
n++; | ||
}, | ||
finish: () => { | ||
if (!n) | ||
return; | ||
lat /= n; | ||
if (lat > 180) | ||
lat -= 360; | ||
lon /= n; | ||
return [lat, lon]; | ||
}, | ||
}; | ||
}; | ||
@@ -99,2 +125,3 @@ exports.initKmeanspp = initKmeanspp; | ||
exports.means = means; | ||
exports.meansWgs84 = meansWgs84; | ||
exports.medians = medians; |
@@ -1,1 +0,1 @@ | ||
!function(t,e){"object"==typeof exports&&"undefined"!=typeof module?e(exports,require("@thi.ng/api"),require("@thi.ng/distance"),require("@thi.ng/random"),require("@thi.ng/vectors")):"function"==typeof define&&define.amd?define(["exports","@thi.ng/api","@thi.ng/distance","@thi.ng/random","@thi.ng/vectors"],e):e(((t="undefined"!=typeof globalThis?globalThis:t||self).thi=t.thi||{},t.thi.ng=t.thi.ng||{},t.thi.ng.kMeans={}),t.thi.ng.api,t.thi.ng.distance,t.thi.ng.random,t.thi.ng.vectors)}(this,(function(t,e,n,i,r){"use strict";const s=(t,e,r=n.DIST_SQ,s=i.SYSTEM)=>{const o=e.length,a=[s.int()%o],h=[e[a[0]]],d=new Array(o).fill(0).map(((t,e)=>e));for(;a.length<t;){let t=e.map((t=>r.from(r.metric(t,h[n.argmin(t,h)]))**2));const s=i.weightedRandom(d,t)();a.push(s),h.push(e[s])}return a},o=(t,e,i,r)=>{let s=!1;for(let o=0,a=t.length;o<a;o++){const a=n.argmin(t[o],e,r);a!==i[o]&&(i[o]=a,s=!0)}return s},a=(t,e)=>{const n=[];for(let i=0,r=e.length;i<r;i++){const r=e[i];(n[r]||(n[r]={id:r,centroid:t[r],items:[]})).items.push(i)}return n},h=t=>{const e=r.zeroes(t);let n=0;return{update:t=>{r.add(e,e,t),n++},finish:()=>n?r.mulN(e,e,1/n):void 0}};t.initKmeanspp=s,t.kmeans=(t,r,d)=>{let{dist:g,initial:l,maxIter:c,rnd:u,strategy:f}=Object.assign({dist:n.DIST_SQ,maxIter:32,strategy:h},d);const m=r.length,p=r[0].length,y=l||s(t,r,g,u);e.assert(y.length===t,"wrong number of initial centroids");const b=y.map((t=>r[t])),v=[];let S=!0;t:for(;S&&c-- >0;){S=o(r,b,v,g);for(let e=0;e<t;e++){const t=f(p);for(let n=0;n<m;n++)e===v[n]&&t.update(r[n]);const n=t.finish();if(n)b[e]=n;else{const t=y.length;if(i.uniqueIndices(1,m,y,void 0,u),y.length===t)break t;b[e]=r[y[t]],S=!0}}}return a(b,v)},t.means=h,t.medians=()=>{const t=[];return{update:e=>t.push(e),finish:()=>t.length?r.median([],t):void 0}},Object.defineProperty(t,"__esModule",{value:!0})})); | ||
!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports,require("@thi.ng/api"),require("@thi.ng/distance"),require("@thi.ng/random"),require("@thi.ng/vectors")):"function"==typeof define&&define.amd?define(["exports","@thi.ng/api","@thi.ng/distance","@thi.ng/random","@thi.ng/vectors"],t):t(((e="undefined"!=typeof globalThis?globalThis:e||self).thi=e.thi||{},e.thi.ng=e.thi.ng||{},e.thi.ng.kMeans={}),e.thi.ng.api,e.thi.ng.distance,e.thi.ng.random,e.thi.ng.vectors)}(this,(function(e,t,n,i,s){"use strict";const r=(e,s,r=n.DIST_SQ,o=i.SYSTEM)=>{const a=s.length;t.assert(a>=e,`insufficient samples for k=${e}`);const h=[o.int()%a],d=[s[h[0]]],l=new Array(a).fill(0).map(((e,t)=>t)),g=r.metric;for(;h.length<e;){let e,t=s.map((e=>r.from(g(e,d[n.argmin(e,d)]))**2));do{e=i.weightedRandom(l,t)()}while(h.includes(e));h.push(e),d.push(s[e])}return h},o=(e,t,i,s)=>{let r=!1;for(let o=0,a=e.length;o<a;o++){const a=n.argmin(e[o],t,s);a!==i[o]&&(i[o]=a,r=!0)}return r},a=(e,t)=>{const n=[];for(let i=0,s=t.length;i<s;i++){const s=t[i];(n[s]||(n[s]={id:s,centroid:e[s],items:[]})).items.push(i)}return n},h=e=>{const t=s.zeroes(e);let n=0;return{update:e=>{s.add(t,t,e),n++},finish:()=>n?s.mulN(t,t,1/n):void 0}};e.initKmeanspp=r,e.kmeans=(e,s,d)=>{let{dist:l,initial:g,maxIter:u,rnd:f,strategy:c}=Object.assign({dist:n.DIST_SQ,maxIter:32,strategy:h},d);const m=s.length,p=s[0].length,y=g||r(e,s,l,f);t.assert(y.length===e,"wrong number of initial centroids");const b=y.map((e=>s[e])),v=[];let S=!0;e:for(;S&&u-- >0;){S=o(s,b,v,l);for(let t=0;t<e;t++){const e=c(p);for(let n=0;n<m;n++)t===v[n]&&e.update(s[n]);const n=e.finish();if(n)b[t]=n;else{const e=y.length;if(i.uniqueIndices(1,m,y,void 0,f),y.length===e)break e;b[t]=s[y[e]],S=!0}}}return a(b,v)},e.means=h,e.meansWgs84=()=>{let e=0,t=0,n=0;return{update:([i,s])=>{e+=i<0?i+360:i,t+=s,n++},finish:()=>{if(n)return e/=n,e>180&&(e-=360),t/=n,[e,t]}}},e.medians=()=>{const e=[];return{update:t=>e.push(t),finish:()=>e.length?s.median([],e):void 0}},Object.defineProperty(e,"__esModule",{value:!0})})); |
{ | ||
"name": "@thi.ng/k-means", | ||
"version": "0.1.0", | ||
"version": "0.2.0", | ||
"description": "Configurable k-means & k-medians (with k-means++ initialization) for n-D vectors", | ||
@@ -79,3 +79,3 @@ "module": "./index.js", | ||
}, | ||
"gitHead": "a099b0a1f1c6180819df9cc4786faa5c2b535140" | ||
"gitHead": "aa62376d4f1da5e28570659120b3723d3856d964" | ||
} |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
43033
381