New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

@thi.ng/k-means

Package Overview
Dependencies
Maintainers
1
Versions
190
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@thi.ng/k-means - npm Package Compare versions

Comparing version 0.6.48 to 0.6.49

2

CHANGELOG.md
# Change Log
- **Last updated**: 2023-12-09T19:12:03Z
- **Last updated**: 2023-12-11T10:07:09Z
- **Generator**: [thi.ng/monopub](https://thi.ng/monopub)

@@ -5,0 +5,0 @@

@@ -10,184 +10,127 @@ import { argmin } from "@thi.ng/distance/argmin";

import { zeroes } from "@thi.ng/vectors/setn";
/**
* Takes an array of n-dimensional `samples` and attempts to assign them to up
* to `k` clusters (might produce less), using the behavior defined by
* (optionally) given `opts`.
*
* @remarks
* https://en.wikipedia.org/wiki/K-medians_clustering
*
* @param k -
* @param samples -
* @param opts -
*/
export const kmeans = (k, samples, opts) => {
let { dist, initial, maxIter, rnd, strategy } = {
dist: DIST_SQ,
maxIter: 32,
strategy: means,
...opts,
};
const num = samples.length;
const dim = samples[0].length;
const centroidIDs = initial || initKmeanspp(k, samples, dist, rnd);
assert(centroidIDs.length > 0, `missing initial centroids`);
k = centroidIDs.length;
const centroids = centroidIDs.map((i) => samples[i]);
const clusters = new Uint32Array(num).fill(k);
let update = true;
while (update && maxIter-- > 0) {
update = assign(samples, centroids, clusters, dist);
if (!update)
break;
for (let i = 0; i < k; i++) {
const impl = strategy(dim);
for (let j = 0; j < num; j++) {
i === clusters[j] && impl.update(samples[j]);
}
const centroid = impl.finish();
if (centroid)
centroids[i] = centroid;
}
const kmeans = (k, samples, opts) => {
let { dist, initial, maxIter, rnd, strategy } = {
dist: DIST_SQ,
maxIter: 32,
strategy: means,
...opts
};
const num = samples.length;
const dim = samples[0].length;
const centroidIDs = initial || initKmeanspp(k, samples, dist, rnd);
assert(centroidIDs.length > 0, `missing initial centroids`);
k = centroidIDs.length;
const centroids = centroidIDs.map((i) => samples[i]);
const clusters = new Uint32Array(num).fill(k);
let update = true;
while (update && maxIter-- > 0) {
update = assign(samples, centroids, clusters, dist);
if (!update)
break;
for (let i = 0; i < k; i++) {
const impl = strategy(dim);
for (let j = 0; j < num; j++) {
i === clusters[j] && impl.update(samples[j]);
}
const centroid = impl.finish();
if (centroid)
centroids[i] = centroid;
}
return buildClusters(centroids, clusters);
}
return buildClusters(centroids, clusters);
};
/**
* k-means++ initialization / selection of initial cluster centroids. Default
* centroid initialization method for {@link kmeans}.
*
* @remarks
* Might return fewer than `k` centroid IDs if the requested number cannot be
* fulfilled (e.g. due to lower number of samples and/or distance metric).
* Throws an error if `samples` are empty.
*
* @remarks
* References:
* - https://en.wikipedia.org/wiki/K-means%2B%2B
* - http://ilpubs.stanford.edu:8090/778/1/2006-13.pdf
* - http://vldb.org/pvldb/vol5/p622_bahmanbahmani_vldb2012.pdf (TODO)
*
* @param k -
* @param samples -
* @param dist -
* @param rnd -
*/
export const initKmeanspp = (k, samples, dist = DIST_SQ, rnd = SYSTEM) => {
const num = samples.length;
assert(num > 0, `missing samples`);
k = Math.min(k, num);
const centroidIDs = [rnd.int() % num];
const centroids = [samples[centroidIDs[0]]];
const indices = new Array(num).fill(0).map((_, i) => i);
const metric = dist.metric;
while (centroidIDs.length < k) {
let psum = 0;
const probs = samples.map((p) => {
const d = dist.from(metric(p, centroids[argmin(p, centroids, dist)])) **
2;
psum += d;
return d;
});
if (!psum)
break;
let id;
do {
id = weightedRandom(indices, probs, rnd)();
} while (centroidIDs.includes(id));
centroidIDs.push(id);
centroids.push(samples[id]);
}
return centroidIDs;
const initKmeanspp = (k, samples, dist = DIST_SQ, rnd = SYSTEM) => {
const num = samples.length;
assert(num > 0, `missing samples`);
k = Math.min(k, num);
const centroidIDs = [rnd.int() % num];
const centroids = [samples[centroidIDs[0]]];
const indices = new Array(num).fill(0).map((_, i) => i);
const metric = dist.metric;
while (centroidIDs.length < k) {
let psum = 0;
const probs = samples.map((p) => {
const d = dist.from(metric(p, centroids[argmin(p, centroids, dist)])) ** 2;
psum += d;
return d;
});
if (!psum)
break;
let id;
do {
id = weightedRandom(indices, probs, rnd)();
} while (centroidIDs.includes(id));
centroidIDs.push(id);
centroids.push(samples[id]);
}
return centroidIDs;
};
const assign = (samples, centroids, assignments, dist) => {
let update = false;
for (let i = samples.length; i-- > 0;) {
const id = argmin(samples[i], centroids, dist);
if (id !== assignments[i]) {
assignments[i] = id;
update = true;
}
let update = false;
for (let i = samples.length; i-- > 0; ) {
const id = argmin(samples[i], centroids, dist);
if (id !== assignments[i]) {
assignments[i] = id;
update = true;
}
return update;
}
return update;
};
const buildClusters = (centroids, assignments) => {
const clusters = [];
for (let i = 0, n = assignments.length; i < n; i++) {
const id = assignments[i];
(clusters[id] ||
(clusters[id] = {
id,
centroid: centroids[id],
items: [],
})).items.push(i);
}
return clusters.filter((x) => !!x);
const clusters = [];
for (let i = 0, n = assignments.length; i < n; i++) {
const id = assignments[i];
(clusters[id] || (clusters[id] = {
id,
centroid: centroids[id],
items: []
})).items.push(i);
}
return clusters.filter((x) => !!x);
};
/**
* Default centroid strategy forming new centroids by averaging the position of
* participating samples.
*
* @param dim -
*/
export const means = (dim) => {
const acc = zeroes(dim);
let n = 0;
return {
update: (p) => {
add(acc, acc, p);
n++;
},
finish: () => (n ? mulN(acc, acc, 1 / n) : undefined),
};
const means = (dim) => {
const acc = zeroes(dim);
let n = 0;
return {
update: (p) => {
add(acc, acc, p);
n++;
},
finish: () => n ? mulN(acc, acc, 1 / n) : void 0
};
};
/**
* Centroid strategy forming new centroids via componentwise medians.
*
* @remarks
* https://en.wikipedia.org/wiki/K-medians_clustering
*/
export const medians = () => {
const acc = [];
return {
update: (p) => acc.push(p),
finish: () => (acc.length ? median([], acc) : undefined),
};
const medians = () => {
const acc = [];
return {
update: (p) => acc.push(p),
finish: () => acc.length ? median([], acc) : void 0
};
};
/**
* Means centroid strategy for decimal degree lat/lon positions (e.g. WGS84).
* Unlike the default {@link means} strategy, this one treats latitude values
* correctly in terms of the ±180 deg boundary and ensures samples on either
* side of the Pacific are forming correct centroids.
*
* @remarks
* When using this strategy, you should also use the
* [`HAVERSINE_LATLON`](https://docs.thi.ng/umbrella/distance/variables/HAVERSINE_LATLON.html)
* distance metric for {@link KMeansOpts.distance}.
*
* @example
* ```ts
* kmeans(3, [...], { strategy: meansLatLon, dist: HAVERSINE_LATLON })
* ```
*
* https://en.wikipedia.org/wiki/World_Geodetic_System
*/
export const meansLatLon = () => {
let lat = 0;
let lon = 0;
let n = 0;
return {
update: ([$lat, $lon]) => {
lat += $lat < 0 ? $lat + 360 : $lat;
lon += $lon;
n++;
},
finish: () => {
if (!n)
return;
lat /= n;
if (lat > 180)
lat -= 360;
lon /= n;
return [lat, lon];
},
};
const meansLatLon = () => {
let lat = 0;
let lon = 0;
let n = 0;
return {
update: ([$lat, $lon]) => {
lat += $lat < 0 ? $lat + 360 : $lat;
lon += $lon;
n++;
},
finish: () => {
if (!n)
return;
lat /= n;
if (lat > 180)
lat -= 360;
lon /= n;
return [lat, lon];
}
};
};
export {
initKmeanspp,
kmeans,
means,
meansLatLon,
medians
};
{
"name": "@thi.ng/k-means",
"version": "0.6.48",
"version": "0.6.49",
"description": "Configurable k-means & k-medians (with k-means++ initialization) for n-D vectors",

@@ -27,3 +27,5 @@ "type": "module",

"scripts": {
"build": "yarn clean && tsc --declaration",
"build": "yarn build:esbuild && yarn build:decl",
"build:decl": "tsc --declaration --emitDeclarationOnly",
"build:esbuild": "esbuild --format=esm --platform=neutral --target=es2022 --tsconfig=tsconfig.json --outdir=. src/**/*.ts",
"clean": "rimraf --glob '*.js' '*.d.ts' '*.map' doc",

@@ -37,10 +39,11 @@ "doc": "typedoc --excludePrivate --excludeInternal --out doc src/index.ts",

"dependencies": {
"@thi.ng/api": "^8.9.11",
"@thi.ng/distance": "^2.4.33",
"@thi.ng/errors": "^2.4.5",
"@thi.ng/random": "^3.6.17",
"@thi.ng/vectors": "^7.8.8"
"@thi.ng/api": "^8.9.12",
"@thi.ng/distance": "^2.4.34",
"@thi.ng/errors": "^2.4.6",
"@thi.ng/random": "^3.6.18",
"@thi.ng/vectors": "^7.8.9"
},
"devDependencies": {
"@microsoft/api-extractor": "^7.38.3",
"esbuild": "^0.19.8",
"rimraf": "^5.0.5",

@@ -85,3 +88,3 @@ "tools": "^0.0.1",

},
"gitHead": "25f2ac8ff795a432a930119661b364d4d93b59a0\n"
"gitHead": "5e7bafedfc3d53bc131469a28de31dd8e5b4a3ff\n"
}

@@ -52,3 +52,3 @@ <!-- This file is generated - DO NOT EDIT! -->

Package sizes (brotli'd, pre-treeshake): ESM: 878 bytes
Package sizes (brotli'd, pre-treeshake): ESM: 868 bytes

@@ -55,0 +55,0 @@ ## Dependencies

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc