Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

binguru

Package Overview
Dependencies
Maintainers
1
Versions
18
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

binguru - npm Package Compare versions

Comparing version 1.0.0-alpha.1.0 to 1.0.0-alpha.2.0

2196

lib/index.js

@@ -1,2 +0,1 @@

"use strict";
/***********************************************************

@@ -32,1162 +31,1173 @@ // Filename: index.ts

};
Object.defineProperty(exports, "__esModule", { value: true });
exports.BinGuru = exports.RESILIENCY = exports.MANUAL_INTERVAL = exports.UNIQUE = exports.UNCLASSED = exports.LOGRITHMIC_INTERVAL = exports.GEOMETRIC_INTERVAL = exports.EXPONENTIAL_BIN_SIZE = exports.FISHER_JENKS = exports.HEAD_TAIL_BREAKS = exports.CK_MEANS = exports.PRETTY_BREAKS = exports.MAXIMUM_BREAKS = exports.STANDARD_DEVIATION = exports.BOXPLOT = exports.QUANTILE = exports.DEFINED_INTERVAL = exports.PERCENTILE = exports.EQUAL_INTERVAL = void 0;
const ss = __importStar(require("simple-statistics"));
const nerdamer_core_js_1 = __importDefault(require("nerdamer/nerdamer.core.js"));
require("nerdamer/Algebra.js");
require("nerdamer/Calculus.js");
require("nerdamer/Solve.js");
exports.EQUAL_INTERVAL = "equalInterval";
exports.PERCENTILE = "percentile";
exports.DEFINED_INTERVAL = "definedInterval";
exports.QUANTILE = "quantile";
exports.BOXPLOT = "boxPlot";
exports.STANDARD_DEVIATION = "standardDeviation";
exports.MAXIMUM_BREAKS = "maximumBreaks";
exports.PRETTY_BREAKS = "prettyBreaks";
exports.CK_MEANS = "ckMeans";
exports.HEAD_TAIL_BREAKS = "headTailBreaks";
exports.FISHER_JENKS = "fisherJenks";
exports.EXPONENTIAL_BIN_SIZE = "exponentialBinSizes";
exports.GEOMETRIC_INTERVAL = "geometricInterval";
exports.LOGRITHMIC_INTERVAL = "logarithmicInterval";
exports.UNCLASSED = "unclassed";
exports.UNIQUE = "unique";
exports.MANUAL_INTERVAL = "manualInterval";
exports.RESILIENCY = "resiliency";
class BinGuru {
constructor(rawData = [], binCount = 5, binExtent = 10, precision = 2) {
this.visModel = {};
// Set input params
this.rawData = rawData;
this.binCount = binCount;
this.binExtent = binExtent;
this.precision = precision;
// Process Data
this.data = this.rawData.filter(value => this.isValid(value)); // only work with non NaN, non null, non undefined, numeric data
this.minSortedData = JSON.parse(JSON.stringify(this.data)).sort((n1, n2) => n1 - n2);
this.maxSortedData = JSON.parse(JSON.stringify(this.data)).sort((n1, n2) => n2 - n1);
// Compute Basic Stats
this.median = ss.median(this.data);
this.mean = ss.mean(this.data);
this.sd = ss.standardDeviation(this.data);
this.iqr = ss.interquartileRange(this.data);
this.lq1 = ss.quantile(this.data, 0.01);
this.lq10 = ss.quantile(this.data, 0.10);
this.lq25 = ss.quantile(this.data, 0.25);
this.uq75 = ss.quantile(this.data, 0.75);
this.uq90 = ss.quantile(this.data, 0.90);
this.uq99 = ss.quantile(this.data, 0.99);
[this.min, this.max] = ss.extent(this.data);
this.nonZeroMin = Math.min.apply(null, this.data.filter(Boolean));
// Round off everything to 2 digits.
this.median = parseFloat(this.median.toFixed(2));
this.mean = parseFloat(this.mean.toFixed(2));
this.sd = parseFloat(this.sd.toFixed(2));
this.iqr = parseFloat(this.iqr.toFixed(2));
this.lq1 = parseFloat(this.lq1.toFixed(2));
this.lq10 = parseFloat(this.lq10.toFixed(2));
this.lq25 = parseFloat(this.lq25.toFixed(2));
this.uq75 = parseFloat(this.uq75.toFixed(2));
this.uq90 = parseFloat(this.uq90.toFixed(2));
this.uq99 = parseFloat(this.uq99.toFixed(2));
[this.min, this.max] = [this.min, this.max].map((item) => parseFloat(item.toFixed(2)));
this.nonZeroMin = parseFloat(this.nonZeroMin.toFixed(2));
(function (factory) {
if (typeof module === "object" && typeof module.exports === "object") {
var v = factory(require, exports);
if (v !== undefined) module.exports = v;
}
/**
* Return most frequently occurring element in the array.
*/
getMostFrequentElement(array) {
const store = {};
array.forEach((num) => store[num] ? store[num] += 1 : store[num] = 1);
return parseInt(Object.keys(store).sort((a, b) => store[b] - store[a])[0]);
else if (typeof define === "function" && define.amd) {
define(["require", "exports", "simple-statistics", "nerdamer/nerdamer.core.js", "nerdamer/Algebra.js", "nerdamer/Calculus.js", "nerdamer/Solve.js"], factory);
}
/**
* Return frequency of most frequently occurring element in the array.
*/
getFrequencyOfMostFrequentElement(array) {
var mp = new Map();
var n = array.length;
// Traverse through array elements and
// count frequencies
for (var i = 0; i < n; i++) {
if (mp.has(array[i]))
mp.set(array[i], mp.get(array[i]) + 1);
else
mp.set(array[i], 1);
})(function (require, exports) {
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.BinGuru = exports.RESILIENCY = exports.MANUAL_INTERVAL = exports.UNIQUE = exports.UNCLASSED = exports.LOGRITHMIC_INTERVAL = exports.GEOMETRIC_INTERVAL = exports.EXPONENTIAL_BIN_SIZE = exports.FISHER_JENKS = exports.HEAD_TAIL_BREAKS = exports.CK_MEANS = exports.PRETTY_BREAKS = exports.MAXIMUM_BREAKS = exports.STANDARD_DEVIATION = exports.BOXPLOT = exports.QUANTILE = exports.DEFINED_INTERVAL = exports.PERCENTILE = exports.EQUAL_INTERVAL = void 0;
const ss = __importStar(require("simple-statistics"));
const nerdamer_core_js_1 = __importDefault(require("nerdamer/nerdamer.core.js"));
require("nerdamer/Algebra.js");
require("nerdamer/Calculus.js");
require("nerdamer/Solve.js");
exports.EQUAL_INTERVAL = "equalInterval";
exports.PERCENTILE = "percentile";
exports.DEFINED_INTERVAL = "definedInterval";
exports.QUANTILE = "quantile";
exports.BOXPLOT = "boxPlot";
exports.STANDARD_DEVIATION = "standardDeviation";
exports.MAXIMUM_BREAKS = "maximumBreaks";
exports.PRETTY_BREAKS = "prettyBreaks";
exports.CK_MEANS = "ckMeans";
exports.HEAD_TAIL_BREAKS = "headTailBreaks";
exports.FISHER_JENKS = "fisherJenks";
exports.EXPONENTIAL_BIN_SIZE = "exponentialBinSizes";
exports.GEOMETRIC_INTERVAL = "geometricInterval";
exports.LOGRITHMIC_INTERVAL = "logarithmicInterval";
exports.UNCLASSED = "unclassed";
exports.UNIQUE = "unique";
exports.MANUAL_INTERVAL = "manualInterval";
exports.RESILIENCY = "resiliency";
class BinGuru {
constructor(rawData = [], binCount = 5, binExtent = 10, precision = 2) {
this.visModel = {};
// Set input params
this.rawData = rawData;
this.binCount = binCount;
this.binExtent = binExtent;
this.precision = precision;
// Process Data
this.data = this.rawData.filter(value => this.isValid(value)); // only work with non NaN, non null, non undefined, numeric data
this.minSortedData = JSON.parse(JSON.stringify(this.data)).sort((n1, n2) => n1 - n2);
this.maxSortedData = JSON.parse(JSON.stringify(this.data)).sort((n1, n2) => n2 - n1);
// Compute Basic Stats
this.median = ss.median(this.data);
this.mean = ss.mean(this.data);
this.sd = ss.standardDeviation(this.data);
this.iqr = ss.interquartileRange(this.data);
this.lq1 = ss.quantile(this.data, 0.01);
this.lq10 = ss.quantile(this.data, 0.10);
this.lq25 = ss.quantile(this.data, 0.25);
this.uq75 = ss.quantile(this.data, 0.75);
this.uq90 = ss.quantile(this.data, 0.90);
this.uq99 = ss.quantile(this.data, 0.99);
[this.min, this.max] = ss.extent(this.data);
this.nonZeroMin = Math.min.apply(null, this.data.filter(Boolean));
// Round off everything to 2 digits.
this.median = parseFloat(this.median.toFixed(2));
this.mean = parseFloat(this.mean.toFixed(2));
this.sd = parseFloat(this.sd.toFixed(2));
this.iqr = parseFloat(this.iqr.toFixed(2));
this.lq1 = parseFloat(this.lq1.toFixed(2));
this.lq10 = parseFloat(this.lq10.toFixed(2));
this.lq25 = parseFloat(this.lq25.toFixed(2));
this.uq75 = parseFloat(this.uq75.toFixed(2));
this.uq90 = parseFloat(this.uq90.toFixed(2));
this.uq99 = parseFloat(this.uq99.toFixed(2));
[this.min, this.max] = [this.min, this.max].map((item) => parseFloat(item.toFixed(2)));
this.nonZeroMin = parseFloat(this.nonZeroMin.toFixed(2));
}
var keys = [];
mp.forEach((value, key) => {
keys.push(key);
});
keys.sort((a, b) => a - b);
// Traverse through map and print frequencies
let max = -Infinity;
keys.forEach((key) => {
let val = mp.get(key);
if (val > max) {
max = val;
/**
* Return most frequently occurring element in the array.
*/
getMostFrequentElement(array) {
const store = {};
array.forEach((num) => store[num] ? store[num] += 1 : store[num] = 1);
return parseInt(Object.keys(store).sort((a, b) => store[b] - store[a])[0]);
}
/**
* Return frequency of most frequently occurring element in the array.
*/
getFrequencyOfMostFrequentElement(array) {
var mp = new Map();
var n = array.length;
// Traverse through array elements and
// count frequencies
for (var i = 0; i < n; i++) {
if (mp.has(array[i]))
mp.set(array[i], mp.get(array[i]) + 1);
else
mp.set(array[i], 1);
}
});
return max;
}
/**
* Maximum Breaks
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
maximumBreaks() {
let context = this;
let binBreaks = [];
// compute the differences between adjacent array elements in the sorted version of the data.
let diffs = [];
for (var i = 0; i < context.minSortedData.length - 1; i++) {
const diff = context.minSortedData[i + 1] - context.minSortedData[i];
diffs.push(diff);
var keys = [];
mp.forEach((value, key) => {
keys.push(key);
});
keys.sort((a, b) => a - b);
// Traverse through map and print frequencies
let max = -Infinity;
keys.forEach((key) => {
let val = mp.get(key);
if (val > max) {
max = val;
}
});
return max;
}
// note the corresponding indices of the element diffs that is sorted in the descending order by their diff.
var len = diffs.length;
var indices = new Array(len);
for (var i = 0; i < len; ++i)
indices[i] = i;
indices.sort(function (a, b) { return diffs[a] < diffs[b] ? 1 : diffs[a] > diffs[b] ? -1 : 0; }); // descending order
// Next, choose the top `noOfBreaks` (or binCount-1)
// Note: do index + 1 - because `threshold` scale binBreaks has < upper limit and not <= upper limit.
for (let i = 0; i < (context.binCount - 1); i++) {
binBreaks.push(context.minSortedData[indices[i] + 1]);
/**
* Maximum Breaks
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
maximumBreaks() {
let context = this;
let binBreaks = [];
// compute the differences between adjacent array elements in the sorted version of the data.
let diffs = [];
for (var i = 0; i < context.minSortedData.length - 1; i++) {
const diff = context.minSortedData[i + 1] - context.minSortedData[i];
diffs.push(diff);
}
// note the corresponding indices of the element diffs that is sorted in the descending order by their diff.
var len = diffs.length;
var indices = new Array(len);
for (var i = 0; i < len; ++i)
indices[i] = i;
indices.sort(function (a, b) { return diffs[a] < diffs[b] ? 1 : diffs[a] > diffs[b] ? -1 : 0; }); // descending order
// Next, choose the top `noOfBreaks` (or binCount-1)
// Note: do index + 1 - because `threshold` scale binBreaks has < upper limit and not <= upper limit.
for (let i = 0; i < (context.binCount - 1); i++) {
binBreaks.push(context.minSortedData[indices[i] + 1]);
}
binBreaks = binBreaks.sort(function (a, b) { return a - b; }); // Need to sort it back to ascending order;
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": context.binCount,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
binBreaks = binBreaks.sort(function (a, b) { return a - b; }); // Need to sort it back to ascending order;
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": context.binCount,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Head Tail Breaks
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
headTailBreaks() {
let context = this;
let binBreaks = [];
function recursive(data) {
let data_mean = data.reduce(function (a, b) { return a + b; }) / data.length;
let head = data.filter(function (d) { return d > data_mean; });
binBreaks.push(data_mean);
while (head.length > 1 && head.length / data.length < 0.40) {
return recursive(head);
/**
* Head Tail Breaks
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
headTailBreaks() {
let context = this;
let binBreaks = [];
function recursive(data) {
let data_mean = data.reduce(function (a, b) { return a + b; }) / data.length;
let head = data.filter(function (d) { return d > data_mean; });
binBreaks.push(data_mean);
while (head.length > 1 && head.length / data.length < 0.40) {
return recursive(head);
}
;
}
;
recursive(context.maxSortedData);
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
recursive(context.maxSortedData);
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* ckMeans
* Description: The heuristic k-means algorithm, widely used for cluster analysis, does not guarantee optimality. CKMeans is a dynamic programming algorithm for optimal one-dimensional clustering.
* URL: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5148156/
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
ckMeans() {
let context = this;
let binBreaks = [];
let clusters = ss.ckmeans(context.data, context.binCount);
binBreaks = clusters.map(function (cluster) {
return cluster[cluster.length - 1]; // Last element of each cluster is the bin's upper limit;
});
binBreaks = binBreaks.slice(0, -1); // Delete the last element.
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Equal Interval
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
equalInterval() {
let context = this;
let binBreaks = [];
let binExtent = (context.max - context.min) / context.binCount;
for (let i = 0; i < (context.binCount - 1); i++) {
let value = context.min + binExtent * (i + 1);
binBreaks.push(value);
/**
* ckMeans
* Description: The heuristic k-means algorithm, widely used for cluster analysis, does not guarantee optimality. CKMeans is a dynamic programming algorithm for optimal one-dimensional clustering.
* URL: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5148156/
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
ckMeans() {
let context = this;
let binBreaks = [];
let clusters = ss.ckmeans(context.data, context.binCount);
binBreaks = clusters.map(function (cluster) {
return cluster[cluster.length - 1]; // Last element of each cluster is the bin's upper limit;
});
binBreaks = binBreaks.slice(0, -1); // Delete the last element.
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": context.binCount,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Percentile
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
percentile() {
let context = this;
let binBreaks = [
context.lq1 + Number.EPSILON,
context.lq10,
context.median,
context.uq90,
context.uq99
];
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Quantile
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
quantile() {
let context = this;
let binBreaks = [];
const indexIncrement = Math.floor(context.minSortedData.length / context.binCount);
for (let i = 1; i < context.binCount; i++) {
let value = context.minSortedData[indexIncrement * i];
binBreaks.push(value);
}
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Mean - Standard Deviation
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
standardDeviation() {
let context = this;
let binBreaks = [];
let binCount = context.binCount; // We create a copy of binCount because we modify it for a specific case (when it is a odd number) but we don't want to update the global setting.
// If there are even binCount
let minStart = 0;
let increment = 0;
if (binCount % 2 == 0) {
minStart = context.mean - (context.sd * (binCount / 2 - 1));
increment = context.sd; // 1 standard deviation
}
else {
// minStart = mean - (sd * ((binCount - 1) / 2) / 2);
// increment = sd / 2; // 0.5 standard deviation
binCount++;
minStart = context.mean - (context.sd * (binCount / 2 - 1));
increment = context.sd; // 1 standard deviation
}
for (let i = 0; i < (binCount - 1); i++) {
let value = minStart + increment * i;
binBreaks.push(value);
}
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Manual Interval, similar to User Defined
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
manualInterval() {
let context = this;
// let binBreaks = [context.mean - (context.mean - context.min) / 2, context.mean + (context.max - context.mean) / 2];
let binBreaks = [70, 80, 90];
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Pretty Breaks
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
prettyBreaks() {
let context = this;
let binBreaks = [];
if (context.binCount == 1) {
binBreaks = [];
}
else if (context.binCount == 2) {
binBreaks = [parseFloat(context.mean.toPrecision(2))];
}
else {
/**
* Equal Interval
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
equalInterval() {
let context = this;
let binBreaks = [];
let binExtent = (context.max - context.min) / context.binCount;
for (let i = 0; i < (context.binCount - 1); i++) {
let value = parseFloat((context.min + binExtent * (i + 1)).toPrecision(2));
let value = context.min + binExtent * (i + 1);
binBreaks.push(value);
}
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": context.binCount,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
// return [10, 20, 30, 40]
binBreaks = [...new Set(binBreaks)]; // converting it into a set and then into an array because sometimes during prettification, we may end up with same breaks.
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, 0),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Box Plot
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
boxPlot() {
let context = this;
const h = 1.5; // `hinge` for determining outliers (the whiskers). You can change the default of 1.5.
// binSize is fixed in this case = 6
let binBreaks = [
context.lq25 - h * context.iqr,
context.lq25,
context.median,
context.uq75,
context.uq75 + h * context.iqr
];
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Defined Interval
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
definedInterval() {
let context = this;
let binBreaks = [];
let binCount = 1; // binCount is hard-coded here.
while (context.min + (context.binExtent * binCount) < context.max) {
binBreaks.push(context.min + context.binExtent * binCount);
binCount++;
/**
* Percentile
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
percentile() {
let context = this;
let binBreaks = [
context.lq1 + Number.EPSILON,
context.lq10,
context.median,
context.uq90,
context.uq99
];
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Geometric Interval
* Source: `A Python Script for Geometric Interval binning method in QGIS: A Useful Tool for Archaeologists`
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
geometricInterval() {
let context = this;
let binBreaks = [];
// If min is 0, then the multiplier will turn out to be Infinity; hence we then start from the nonZeroMin as the start.
// An alternative could be Number.EPSILON as it is the smallest value above 0 but it seems to be resulting in weird results.
// ToDo: How does a geometric sequence update when both negative and positive values exist; what is the multiplier in those cases?
let seriesStartVal = context.min == 0 ? context.nonZeroMin : context.min;
let multiplier = Math.pow(context.max / seriesStartVal, 1 / context.binCount);
// The formula defines bins' upper limits;
// Hence, we run it only until noOfBreaks = (binCount - 1)
for (let i = 0; i < (context.binCount - 1); i++) {
let value = seriesStartVal * Math.pow(multiplier, i + 1);
binBreaks.push(value);
/**
* Quantile
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
quantile() {
let context = this;
let binBreaks = [];
const indexIncrement = Math.floor(context.minSortedData.length / context.binCount);
for (let i = 1; i < context.binCount; i++) {
let value = context.minSortedData[indexIncrement * i];
binBreaks.push(value);
}
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Logarithmic Interval
* Intervals grow exponentially, based on a logarithmic scale, to accommodate a wide range of data values and emphasize relative differences at both small and large scales.
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
logarithmicInterval(logBase = 'auto') {
let context = this;
let binBreaks = [];
let binBreak = context.min;
// Calculate the logarithmic base
if (logBase == "auto") {
// Calculate the logarithmic base from the data extent and desired bin count
logBase = Math.pow((context.max / context.min), (1 / context.binCount));
// Generate the bin boundaries using the logarithmic scale
for (let i = 0; i < context.binCount; i++) {
if (i != 0)
binBreaks.push(binBreak);
binBreak *= logBase;
/**
* Mean - Standard Deviation
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
standardDeviation() {
let context = this;
let binBreaks = [];
let binCount = context.binCount; // We create a copy of binCount because we modify it for a specific case (when it is a odd number) but we don't want to update the global setting.
// If there are even binCount
let minStart = 0;
let increment = 0;
if (binCount % 2 == 0) {
minStart = context.mean - (context.sd * (binCount / 2 - 1));
increment = context.sd; // 1 standard deviation
}
else {
// minStart = mean - (sd * ((binCount - 1) / 2) / 2);
// increment = sd / 2; // 0.5 standard deviation
binCount++;
minStart = context.mean - (context.sd * (binCount / 2 - 1));
increment = context.sd; // 1 standard deviation
}
for (let i = 0; i < (binCount - 1); i++) {
let value = minStart + increment * i;
binBreaks.push(value);
}
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
else {
// Calculate the logarithmic interval size
const logIntervalSize = (Math.log10(context.max) - Math.log10(context.min)) / context.binCount;
for (let i = 0; i < context.binCount; i++) {
if (i != 0)
binBreaks.push(binBreak);
binBreak *= Math.pow(10, logIntervalSize);
/**
* Manual Interval, similar to User Defined
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
manualInterval() {
let context = this;
// let binBreaks = [context.mean - (context.mean - context.min) / 2, context.mean + (context.max - context.mean) / 2];
let binBreaks = [70, 80, 90];
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Pretty Breaks
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
prettyBreaks() {
let context = this;
let binBreaks = [];
if (context.binCount == 1) {
binBreaks = [];
}
}
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Exponential Bin Size
* Intervals are selected so that the number of observations in each successive interval increases (or decreases) exponentially
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
exponentialBinSizes() {
let context = this;
let binBreaks = [];
const firstBinSize = 1; // Heuristic
const seriesSum = context.minSortedData.length;
const equation = firstBinSize.toString() + ' * (1 - x^' + context.binCount.toString() + ') = ' + seriesSum.toString() + ' * (1 - x)';
const solutions = nerdamer_core_js_1.default.solveEquations(equation, 'x').map((solution) => (0, nerdamer_core_js_1.default)(solution).evaluate().text());
let commonRatio = 1;
for (let i = 0; i < solutions.length; i++) {
try {
let numericSolution = parseFloat(solutions[i]);
if (numericSolution != 1) {
commonRatio = numericSolution;
break;
else if (context.binCount == 2) {
binBreaks = [parseFloat(context.mean.toPrecision(2))];
}
else {
let binExtent = (context.max - context.min) / context.binCount;
for (let i = 0; i < (context.binCount - 1); i++) {
let value = parseFloat((context.min + binExtent * (i + 1)).toPrecision(2));
binBreaks.push(value);
}
}
catch (err) {
continue;
// return [10, 20, 30, 40]
binBreaks = [...new Set(binBreaks)]; // converting it into a set and then into an array because sometimes during prettification, we may end up with same breaks.
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, 0),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Box Plot
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
boxPlot() {
let context = this;
const h = 1.5; // `hinge` for determining outliers (the whiskers). You can change the default of 1.5.
// binSize is fixed in this case = 6
let binBreaks = [
context.lq25 - h * context.iqr,
context.lq25,
context.median,
context.uq75,
context.uq75 + h * context.iqr
];
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Defined Interval
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
definedInterval() {
let context = this;
let binBreaks = [];
let binCount = 1; // binCount is hard-coded here.
while (context.min + (context.binExtent * binCount) < context.max) {
binBreaks.push(context.min + context.binExtent * binCount);
binCount++;
}
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
// If commonRatio is still 1, then there is no geometric, exponential series.
if (commonRatio == 1) {
return [];
/**
* Geometric Interval
* Source: `A Python Script for Geometric Interval binning method in QGIS: A Useful Tool for Archaeologists`
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
geometricInterval() {
let context = this;
let binBreaks = [];
// If min is 0, then the multiplier will turn out to be Infinity; hence we then start from the nonZeroMin as the start.
// An alternative could be Number.EPSILON as it is the smallest value above 0 but it seems to be resulting in weird results.
// ToDo: How does a geometric sequence update when both negative and positive values exist; what is the multiplier in those cases?
let seriesStartVal = context.min == 0 ? context.nonZeroMin : context.min;
let multiplier = Math.pow(context.max / seriesStartVal, 1 / context.binCount);
// The formula defines bins' upper limits;
// Hence, we run it only until noOfBreaks = (binCount - 1)
for (let i = 0; i < (context.binCount - 1); i++) {
let value = seriesStartVal * Math.pow(multiplier, i + 1);
binBreaks.push(value);
}
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
else {
let cumulativeSizeBins = 0;
for (let i = 0; i < context.binCount - 1; i++) {
// Size of Nth bin (beginning from firstBinSize and then increasing based on commonRatio)
let nthBinSize = firstBinSize * (Math.pow(commonRatio, i));
// Compute Running Sum of number of items covered.
cumulativeSizeBins += nthBinSize;
// Element Index
const elementIndex = Math.floor(cumulativeSizeBins);
// Bin Break
const binBreak = context.minSortedData[elementIndex - 1]; // -1 as count and index are off by 1.
// Push the value for the binBreak to the binBreaks array.
binBreaks.push(binBreak);
/**
* Logarithmic Interval
* Intervals grow exponentially, based on a logarithmic scale, to accommodate a wide range of data values and emphasize relative differences at both small and large scales.
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
logarithmicInterval(logBase = 'auto') {
let context = this;
let binBreaks = [];
let binBreak = context.min;
// Calculate the logarithmic base
if (logBase == "auto") {
// Calculate the logarithmic base from the data extent and desired bin count
logBase = Math.pow((context.max / context.min), (1 / context.binCount));
// Generate the bin boundaries using the logarithmic scale
for (let i = 0; i < context.binCount; i++) {
if (i != 0)
binBreaks.push(binBreak);
binBreak *= logBase;
}
}
else {
// Calculate the logarithmic interval size
const logIntervalSize = (Math.log10(context.max) - Math.log10(context.min)) / context.binCount;
for (let i = 0; i < context.binCount; i++) {
if (i != 0)
binBreaks.push(binBreak);
binBreak *= Math.pow(10, logIntervalSize);
}
}
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Fisher Jenks
* URL: http://en.wikipedia.org/wiki/Jenks_natural_breaks_optimization
* Implementations: [1](http://danieljlewis.org/files/2010/06/Jenks.pdf) (python),
* [2](https://github.com/vvoovv/djeo-jenks/blob/master/main.js) (buggy),
* [3](https://github.com/simogeo/geostats/blob/master/lib/geostats.js#L407) (works)
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
fisherJenks() {
let context = this;
// Compute the matrices required for Jenks breaks. These matrices
// can be used for any binning of data with `bins <= binCount`
function getMatrices(data, binCount) {
// in the original implementation, these matrices are referred to
// as `LC` and `OP`
//
// * lower_bin_limits (LC): optimal lower bin limits
// * variance_combinations (OP): optimal variance combinations for all bins
var lower_bin_limits = [], variance_combinations = [],
// loop counters
i, j,
// the variance, as computed at each step in the calculation
variance = 0;
// Initialize and fill each matrix with zeroes
for (i = 0; i < data.length + 1; i++) {
var tmp1 = [], tmp2 = [];
for (j = 0; j < binCount + 1; j++) {
tmp1.push(0);
tmp2.push(0);
/**
* Exponential Bin Size
* Intervals are selected so that the number of observations in each successive interval increases (or decreases) exponentially
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
exponentialBinSizes() {
let context = this;
let binBreaks = [];
const firstBinSize = 1; // Heuristic
const seriesSum = context.minSortedData.length;
const equation = firstBinSize.toString() + ' * (1 - x^' + context.binCount.toString() + ') = ' + seriesSum.toString() + ' * (1 - x)';
const solutions = nerdamer_core_js_1.default.solveEquations(equation, 'x').map((solution) => (0, nerdamer_core_js_1.default)(solution).evaluate().text());
let commonRatio = 1;
for (let i = 0; i < solutions.length; i++) {
try {
let numericSolution = parseFloat(solutions[i]);
if (numericSolution != 1) {
commonRatio = numericSolution;
break;
}
}
lower_bin_limits.push(tmp1);
variance_combinations.push(tmp2);
catch (err) {
continue;
}
}
for (i = 1; i < binCount + 1; i++) {
lower_bin_limits[1][i] = 1;
variance_combinations[1][i] = 0;
// in the original implementation, 9999999 is used but
// since Javascript has `Infinity`, we use that.
for (j = 2; j < data.length + 1; j++) {
variance_combinations[j][i] = Infinity;
// If commonRatio is still 1, then there is no geometric, exponential series.
if (commonRatio == 1) {
return [];
}
else {
let cumulativeSizeBins = 0;
for (let i = 0; i < context.binCount - 1; i++) {
// Size of Nth bin (beginning from firstBinSize and then increasing based on commonRatio)
let nthBinSize = firstBinSize * (Math.pow(commonRatio, i));
// Compute Running Sum of number of items covered.
cumulativeSizeBins += nthBinSize;
// Element Index
const elementIndex = Math.floor(cumulativeSizeBins);
// Bin Break
const binBreak = context.minSortedData[elementIndex - 1]; // -1 as count and index are off by 1.
// Push the value for the binBreak to the binBreaks array.
binBreaks.push(binBreak);
}
}
for (var l = 2; l < data.length + 1; l++) {
// `SZ` originally. this is the sum of the values seen thus
// far when calculating variance.
var sum = 0,
// `ZSQ` originally. the sum of squares of values seen
// thus far
sum_squares = 0,
// `WT` originally. This is the number of
w = 0,
// `IV` originally
i4 = 0;
// in several instances, you could say `Math.pow(x, 2)`
// instead of `x * x`, but this is slower in some browsers
// introduces an unnecessary concept.
for (var m = 1; m < l + 1; m++) {
// `III` originally
var lower_bin_limit = l - m + 1, val = data[lower_bin_limit - 1];
// here we're estimating variance for each potential binning
// of the data, for each potential number of bins. `w`
// is the number of data points considered so far.
w++;
// increase the current sum and sum-of-squares
sum += val;
sum_squares += val * val;
// the variance at this point in the sequence is the difference
// between the sum of squares and the total x 2, over the number
// of samples.
variance = sum_squares - (sum * sum) / w;
i4 = lower_bin_limit - 1;
if (i4 !== 0) {
for (j = 2; j < binCount + 1; j++) {
// if adding this element to an existing bin
// will increase its variance beyond the limit, break
// the bin at this point, setting the lower_bin_limit
// at this point.
if (variance_combinations[l][j] >=
(variance + variance_combinations[i4][j - 1])) {
lower_bin_limits[l][j] = lower_bin_limit;
variance_combinations[l][j] = variance +
variance_combinations[i4][j - 1];
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Fisher Jenks
* URL: http://en.wikipedia.org/wiki/Jenks_natural_breaks_optimization
* Implementations: [1](http://danieljlewis.org/files/2010/06/Jenks.pdf) (python),
* [2](https://github.com/vvoovv/djeo-jenks/blob/master/main.js) (buggy),
* [3](https://github.com/simogeo/geostats/blob/master/lib/geostats.js#L407) (works)
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
fisherJenks() {
let context = this;
// Compute the matrices required for Jenks breaks. These matrices
// can be used for any binning of data with `bins <= binCount`
function getMatrices(data, binCount) {
// in the original implementation, these matrices are referred to
// as `LC` and `OP`
//
// * lower_bin_limits (LC): optimal lower bin limits
// * variance_combinations (OP): optimal variance combinations for all bins
var lower_bin_limits = [], variance_combinations = [],
// loop counters
i, j,
// the variance, as computed at each step in the calculation
variance = 0;
// Initialize and fill each matrix with zeroes
for (i = 0; i < data.length + 1; i++) {
var tmp1 = [], tmp2 = [];
for (j = 0; j < binCount + 1; j++) {
tmp1.push(0);
tmp2.push(0);
}
lower_bin_limits.push(tmp1);
variance_combinations.push(tmp2);
}
for (i = 1; i < binCount + 1; i++) {
lower_bin_limits[1][i] = 1;
variance_combinations[1][i] = 0;
// in the original implementation, 9999999 is used but
// since Javascript has `Infinity`, we use that.
for (j = 2; j < data.length + 1; j++) {
variance_combinations[j][i] = Infinity;
}
}
for (var l = 2; l < data.length + 1; l++) {
// `SZ` originally. this is the sum of the values seen thus
// far when calculating variance.
var sum = 0,
// `ZSQ` originally. the sum of squares of values seen
// thus far
sum_squares = 0,
// `WT` originally. This is the number of
w = 0,
// `IV` originally
i4 = 0;
// in several instances, you could say `Math.pow(x, 2)`
// instead of `x * x`, but this is slower in some browsers
// introduces an unnecessary concept.
for (var m = 1; m < l + 1; m++) {
// `III` originally
var lower_bin_limit = l - m + 1, val = data[lower_bin_limit - 1];
// here we're estimating variance for each potential binning
// of the data, for each potential number of bins. `w`
// is the number of data points considered so far.
w++;
// increase the current sum and sum-of-squares
sum += val;
sum_squares += val * val;
// the variance at this point in the sequence is the difference
// between the sum of squares and the total x 2, over the number
// of samples.
variance = sum_squares - (sum * sum) / w;
i4 = lower_bin_limit - 1;
if (i4 !== 0) {
for (j = 2; j < binCount + 1; j++) {
// if adding this element to an existing bin
// will increase its variance beyond the limit, break
// the bin at this point, setting the lower_bin_limit
// at this point.
if (variance_combinations[l][j] >=
(variance + variance_combinations[i4][j - 1])) {
lower_bin_limits[l][j] = lower_bin_limit;
variance_combinations[l][j] = variance +
variance_combinations[i4][j - 1];
}
}
}
}
lower_bin_limits[l][1] = 1;
variance_combinations[l][1] = variance;
}
lower_bin_limits[l][1] = 1;
variance_combinations[l][1] = variance;
// return the two matrices. for just providing breaks, only
// `lower_bin_limits` is needed, but variances can be useful to
// evaluage goodness of fit.
return {
lower_bin_limits: lower_bin_limits,
variance_combinations: variance_combinations
};
}
// return the two matrices. for just providing breaks, only
// `lower_bin_limits` is needed, but variances can be useful to
// evaluage goodness of fit.
// the second part of the jenks recipe: take the calculated matrices
// and derive an array of n breaks.
function breaks(data, lower_bin_limits, binCount) {
var k = data.length - 1, kbin = [], countNum = binCount;
// the calculation of bins will never include the upper and
// lower bounds, so we need to explicitly set them
kbin[binCount] = data[data.length - 1];
kbin[0] = data[0];
// the lower_bin_limits matrix is used as indexes into itself
// here: the `k` variable is reused in each iteration.
while (countNum > 1) {
kbin[countNum - 1] = data[lower_bin_limits[k][countNum] - 2];
k = lower_bin_limits[k][countNum] - 1;
countNum--;
}
return kbin;
}
if (context.binCount > context.data.length)
return {
"binCount": null,
"binBreaks": [],
"binSizes": { "valids": null, "invalids": null },
"dataRange": [context.min, context.max],
"dataBinAssignments": {}
};
// sort data in numerical order, since this is expected
// by the matrices function
context.data = context.data.slice().sort(function (a, b) { return a - b; });
// get our basic matrices
var matrices = getMatrices(context.data, context.binCount),
// we only need lower bin limits here
lower_bin_limits = matrices.lower_bin_limits;
// extract binCount out of the computed matrices
const allBreaks = breaks(context.data, lower_bin_limits, context.binCount);
let binBreaks = allBreaks.slice(1).slice(0, -1); // this removes the first and last elements of the array because we just need the middle breaks; the `min` and `max` are implicitly inferred any way.
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
lower_bin_limits: lower_bin_limits,
variance_combinations: variance_combinations
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
// the second part of the jenks recipe: take the calculated matrices
// and derive an array of n breaks.
function breaks(data, lower_bin_limits, binCount) {
var k = data.length - 1, kbin = [], countNum = binCount;
// the calculation of bins will never include the upper and
// lower bounds, so we need to explicitly set them
kbin[binCount] = data[data.length - 1];
kbin[0] = data[0];
// the lower_bin_limits matrix is used as indexes into itself
// here: the `k` variable is reused in each iteration.
while (countNum > 1) {
kbin[countNum - 1] = data[lower_bin_limits[k][countNum] - 2];
k = lower_bin_limits[k][countNum] - 1;
countNum--;
}
return kbin;
/**
* Unique
* This method treats each continuous data value as categorical and maps each unique bin of equal values to a distinct color
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
unique() {
let context = this;
const binBreaks = Array.from(new Set(context.minSortedData));
// Compute Bin Sizes
const binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
const dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
if (context.binCount > context.data.length)
/**
* Unclassed
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
unclassed() {
let context = this;
const binBreaks = [context.min, context.max];
// Compute Bin Sizes
const binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
const dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": null,
"binBreaks": [],
"binSizes": { "valids": null, "invalids": null },
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": {}
"dataBinAssignments": dataBinAssignments
};
// sort data in numerical order, since this is expected
// by the matrices function
context.data = context.data.slice().sort(function (a, b) { return a - b; });
// get our basic matrices
var matrices = getMatrices(context.data, context.binCount),
// we only need lower bin limits here
lower_bin_limits = matrices.lower_bin_limits;
// extract binCount out of the computed matrices
const allBreaks = breaks(context.data, lower_bin_limits, context.binCount);
let binBreaks = allBreaks.slice(1).slice(0, -1); // this removes the first and last elements of the array because we just need the middle breaks; the `min` and `max` are implicitly inferred any way.
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Unique
* This method treats each continuous data value as categorical and maps each unique bin of equal values to a distinct color
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
unique() {
let context = this;
const binBreaks = Array.from(new Set(context.minSortedData));
// Compute Bin Sizes
const binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
const dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Unclassed
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
unclassed() {
let context = this;
const binBreaks = [context.min, context.max];
// Compute Bin Sizes
const binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
const dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": null,
"binBreaks": [],
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments
};
}
/**
* Resiliency
* // TODO: When you switch to Population Density, the problem is that when all top Priority bins are chosen, there is NO bin in the 3rd or 4th bin; only 1 (3k+ points), 2 (1 point), and 5 (5 points) exist. How do we ensure that at least one point belongs to ALL bins to match the total number of bins requirement (and stay consistent that way). What could we do though? Should we move the boundary points of the neighbouring bins (e.g., end of 2 and beginning of 5) into these missing bins (3rd and 4th)? Maybe. OR when filling the binBreaks, take into consideration the corresponding priorityBin ID (i.e., 1, 2, or 5) and then choose the 3rd and 4th binBreaks by splitting the 2nd and 5th binBreaks.
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
resiliency(binningMethods = []) {
let context = this;
let binBreaks = [];
// Data structure to store the binObj corresponding to each binningMethod.
let binObjs = {};
binningMethods.forEach(function (binningMethod) {
let binObj = {};
switch (binningMethod) {
case exports.EQUAL_INTERVAL:
binObj = context.equalInterval();
binObjs[exports.EQUAL_INTERVAL] = JSON.parse(JSON.stringify(binObj));
break;
case exports.PERCENTILE:
binObj = context.percentile();
binObjs[exports.PERCENTILE] = JSON.parse(JSON.stringify(binObj));
break;
case exports.QUANTILE:
binObj = context.quantile();
binObjs[exports.QUANTILE] = JSON.parse(JSON.stringify(binObj));
break;
case exports.STANDARD_DEVIATION:
binObj = context.standardDeviation();
binObjs[exports.STANDARD_DEVIATION] = JSON.parse(JSON.stringify(binObj));
break;
case exports.MANUAL_INTERVAL:
binObj = context.manualInterval();
binObjs[exports.MANUAL_INTERVAL] = JSON.parse(JSON.stringify(binObj));
break;
case exports.PRETTY_BREAKS:
binObj = context.prettyBreaks();
binObjs[exports.PRETTY_BREAKS] = JSON.parse(JSON.stringify(binObj));
break;
case exports.MAXIMUM_BREAKS:
binObj = context.maximumBreaks();
binObjs[exports.MAXIMUM_BREAKS] = JSON.parse(JSON.stringify(binObj));
break;
case exports.HEAD_TAIL_BREAKS:
binObj = context.headTailBreaks();
binObjs[exports.HEAD_TAIL_BREAKS] = JSON.parse(JSON.stringify(binObj));
break;
case exports.CK_MEANS:
binObj = context.ckMeans();
binObjs[exports.CK_MEANS] = JSON.parse(JSON.stringify(binObj));
break;
case exports.BOXPLOT:
binObj = context.boxPlot();
binObjs[exports.BOXPLOT] = JSON.parse(JSON.stringify(binObj));
break;
case exports.DEFINED_INTERVAL:
binObj = context.definedInterval();
binObjs[exports.DEFINED_INTERVAL] = JSON.parse(JSON.stringify(binObj));
break;
case exports.EXPONENTIAL_BIN_SIZE:
binObj = context.exponentialBinSizes();
binObjs[exports.EXPONENTIAL_BIN_SIZE] = JSON.parse(JSON.stringify(binObj));
break;
case exports.LOGRITHMIC_INTERVAL:
binObj = context.logarithmicInterval();
binObjs[exports.LOGRITHMIC_INTERVAL] = JSON.parse(JSON.stringify(binObj));
break;
case exports.GEOMETRIC_INTERVAL:
binObj = context.geometricInterval();
binObjs[exports.GEOMETRIC_INTERVAL] = JSON.parse(JSON.stringify(binObj));
break;
case exports.FISHER_JENKS:
binObj = context.fisherJenks();
binObjs[exports.FISHER_JENKS] = JSON.parse(JSON.stringify(binObj));
break;
default:
binObj = {
"binCount": null,
"binBreaks": [],
"binSizes": { "valids": null, "invalids": null },
"dataRange": [context.min, context.max],
"dataBinAssignments": {}
};
binObjs["default"] = JSON.parse(JSON.stringify(binObj));
}
});
let frequencyOfMostFrequentBins = {};
let mostFrequentBins = {};
context.rawData.forEach(function (val, valindex) {
// Let the primary key be index of the item in the rawDataArray.
let primaryKey = valindex.toString();
if (context.isValid(val)) {
let binAssignmentsForPrimaryKey = Array.from(Object.values(binObjs)).map((binObj) => binObj["dataBinAssignments"][primaryKey]);
if (!(primaryKey in frequencyOfMostFrequentBins)) {
frequencyOfMostFrequentBins[primaryKey] = 0;
}
/**
* Resiliency
* // TODO: When you switch to Population Density, the problem is that when all top Priority bins are chosen, there is NO bin in the 3rd or 4th bin; only 1 (3k+ points), 2 (1 point), and 5 (5 points) exist. How do we ensure that at least one point belongs to ALL bins to match the total number of bins requirement (and stay consistent that way). What could we do though? Should we move the boundary points of the neighbouring bins (e.g., end of 2 and beginning of 5) into these missing bins (3rd and 4th)? Maybe. OR when filling the binBreaks, take into consideration the corresponding priorityBin ID (i.e., 1, 2, or 5) and then choose the 3rd and 4th binBreaks by splitting the 2nd and 5th binBreaks.
* @returns { binCount: number, binBreaks: number[], binSizes: object, dataRange: number[], dataBinAssignments: object }
*/
resiliency(binningMethods = []) {
let context = this;
let binBreaks = [];
// Data structure to store the binObj corresponding to each binningMethod.
let binObjs = {};
binningMethods.forEach(function (binningMethod) {
let binObj = {};
switch (binningMethod) {
case exports.EQUAL_INTERVAL:
binObj = context.equalInterval();
binObjs[exports.EQUAL_INTERVAL] = JSON.parse(JSON.stringify(binObj));
break;
case exports.PERCENTILE:
binObj = context.percentile();
binObjs[exports.PERCENTILE] = JSON.parse(JSON.stringify(binObj));
break;
case exports.QUANTILE:
binObj = context.quantile();
binObjs[exports.QUANTILE] = JSON.parse(JSON.stringify(binObj));
break;
case exports.STANDARD_DEVIATION:
binObj = context.standardDeviation();
binObjs[exports.STANDARD_DEVIATION] = JSON.parse(JSON.stringify(binObj));
break;
case exports.MANUAL_INTERVAL:
binObj = context.manualInterval();
binObjs[exports.MANUAL_INTERVAL] = JSON.parse(JSON.stringify(binObj));
break;
case exports.PRETTY_BREAKS:
binObj = context.prettyBreaks();
binObjs[exports.PRETTY_BREAKS] = JSON.parse(JSON.stringify(binObj));
break;
case exports.MAXIMUM_BREAKS:
binObj = context.maximumBreaks();
binObjs[exports.MAXIMUM_BREAKS] = JSON.parse(JSON.stringify(binObj));
break;
case exports.HEAD_TAIL_BREAKS:
binObj = context.headTailBreaks();
binObjs[exports.HEAD_TAIL_BREAKS] = JSON.parse(JSON.stringify(binObj));
break;
case exports.CK_MEANS:
binObj = context.ckMeans();
binObjs[exports.CK_MEANS] = JSON.parse(JSON.stringify(binObj));
break;
case exports.BOXPLOT:
binObj = context.boxPlot();
binObjs[exports.BOXPLOT] = JSON.parse(JSON.stringify(binObj));
break;
case exports.DEFINED_INTERVAL:
binObj = context.definedInterval();
binObjs[exports.DEFINED_INTERVAL] = JSON.parse(JSON.stringify(binObj));
break;
case exports.EXPONENTIAL_BIN_SIZE:
binObj = context.exponentialBinSizes();
binObjs[exports.EXPONENTIAL_BIN_SIZE] = JSON.parse(JSON.stringify(binObj));
break;
case exports.LOGRITHMIC_INTERVAL:
binObj = context.logarithmicInterval();
binObjs[exports.LOGRITHMIC_INTERVAL] = JSON.parse(JSON.stringify(binObj));
break;
case exports.GEOMETRIC_INTERVAL:
binObj = context.geometricInterval();
binObjs[exports.GEOMETRIC_INTERVAL] = JSON.parse(JSON.stringify(binObj));
break;
case exports.FISHER_JENKS:
binObj = context.fisherJenks();
binObjs[exports.FISHER_JENKS] = JSON.parse(JSON.stringify(binObj));
break;
default:
binObj = {
"binCount": null,
"binBreaks": [],
"binSizes": { "valids": null, "invalids": null },
"dataRange": [context.min, context.max],
"dataBinAssignments": {}
};
binObjs["default"] = JSON.parse(JSON.stringify(binObj));
}
frequencyOfMostFrequentBins[primaryKey] = context.getFrequencyOfMostFrequentElement(binAssignmentsForPrimaryKey);
if (!(primaryKey in mostFrequentBins)) {
mostFrequentBins[primaryKey] = 0;
});
let frequencyOfMostFrequentBins = {};
let mostFrequentBins = {};
context.rawData.forEach(function (val, valindex) {
// Let the primary key be index of the item in the rawDataArray.
let primaryKey = valindex.toString();
if (context.isValid(val)) {
let binAssignmentsForPrimaryKey = Array.from(Object.values(binObjs)).map((binObj) => binObj["dataBinAssignments"][primaryKey]);
if (!(primaryKey in frequencyOfMostFrequentBins)) {
frequencyOfMostFrequentBins[primaryKey] = 0;
}
frequencyOfMostFrequentBins[primaryKey] = context.getFrequencyOfMostFrequentElement(binAssignmentsForPrimaryKey);
if (!(primaryKey in mostFrequentBins)) {
mostFrequentBins[primaryKey] = 0;
}
mostFrequentBins[primaryKey] = context.getMostFrequentElement(binAssignmentsForPrimaryKey);
}
mostFrequentBins[primaryKey] = context.getMostFrequentElement(binAssignmentsForPrimaryKey);
}
});
// Compute Data for Resiliency
let resiliencyData = [];
Object.keys(frequencyOfMostFrequentBins).forEach(function (primaryKey, valindex) {
let obj = {};
obj["primaryKey"] = primaryKey;
obj["value"] = context.rawData[valindex];
obj["binCandidates"] = [];
binningMethods.forEach(function (binningMethod) {
obj["binCandidates"].push(JSON.parse(JSON.stringify(binObjs[binningMethod]["dataBinAssignments"][primaryKey])));
});
resiliencyData.push(obj);
});
let itemwiseBinPriorities = {};
let itemwiseBinPriorityWeights = {};
resiliencyData.forEach(function (d) {
itemwiseBinPriorities[d["primaryKey"]] = [];
itemwiseBinPriorityWeights[d["primaryKey"]] = [];
let arr = [...d["binCandidates"]];
while (arr.length > 0) {
const mostFrequentElement = context.getMostFrequentElement(arr);
const frequencyOfMostFrequentElement = context.getFrequencyOfMostFrequentElement(arr);
// Trim the `arr' to now find the next mostFrequentElement and frequencyOfMostFrequentElement.
arr = arr.filter(function (item) { return item !== mostFrequentElement; });
// Add to the priority lists
itemwiseBinPriorities[d["primaryKey"]].push(mostFrequentElement);
itemwiseBinPriorityWeights[d["primaryKey"]].push(frequencyOfMostFrequentElement);
}
});
// Now, iterate through the TOP priority bins for all data items and put them into those bins.
// Then, compute the min-max of these bins OR basically, determine if they are in an AP.
// If they are in an arithmetic progression, well and good.
// If not, there is a need to deprioritize the preferences of the boundary data items and reclassify them to their next best bin priority.
// Keep doing this until there is a solution.
let binInfo = {};
let priorityBins = [];
resiliencyData.forEach(function (d) {
let priorityBin = itemwiseBinPriorities[d["primaryKey"]][0]; // First element is highest priority.
if (!(priorityBin in binInfo)) {
binInfo[priorityBin] = [];
priorityBins.push(priorityBin);
}
binInfo[priorityBin].push(d["value"]);
});
// Sort priorityBins from something like [3, 2, 4, 5, 1] to [1, 2, 3, 4, 5] (No harm in doing this)
priorityBins = priorityBins.sort((n1, n2) => n1 - n2);
// Sort within the priority bins
priorityBins.forEach(function (priorityBin, valindex) {
binInfo[priorityBin] = binInfo[priorityBin].sort((n1, n2) => n1 - n2);
// The first item from the 2nd bin onwards would be the binBreaks.
// TODO: Consideration: Instead of taking the FIRST element of the 2nd item (or the last element of the 1st item), consider taking the AVERAGE of the two! Might be very interesting as they will absolutely ensure the respective points eventually end up in the appropriate bin, i.e., not get into > or >= dilemmas.
if (valindex > 0) {
binBreaks.push(binInfo[priorityBin][0]);
}
});
// New: Round all binBreaks
binBreaks = binBreaks.map((item) => parseFloat(item.toFixed(2)));
binBreaks = binBreaks.sort((n1, n2) => n1 - n2);
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments,
"binObjs": binObjs,
"mostFrequentBins": mostFrequentBins,
"frequencyOfMostFrequentBins": frequencyOfMostFrequentBins
};
}
// Compute Bin Size from Bin Breaks
computeBinSizes(binBreaks) {
let context = this;
// Reset Bin Sizes;
let binSizes = {};
let invalids = 0;
// Iterate through all values for the current feature/attribute.
// Where to put NaNs / nulls? For now, just ignore them; we need valindex hence still need to iterate over all.
context.rawData.forEach(function (val, valindex) {
if (context.isValid(val)) {
// We want 1 index, not 0 index.
let binID = 1;
if (!(binID in binSizes)) {
binSizes[binID] = 0;
// Compute Data for Resiliency
let resiliencyData = [];
Object.keys(frequencyOfMostFrequentBins).forEach(function (primaryKey, valindex) {
let obj = {};
obj["primaryKey"] = primaryKey;
obj["value"] = context.rawData[valindex];
obj["binCandidates"] = [];
binningMethods.forEach(function (binningMethod) {
obj["binCandidates"].push(JSON.parse(JSON.stringify(binObjs[binningMethod]["dataBinAssignments"][primaryKey])));
});
resiliencyData.push(obj);
});
let itemwiseBinPriorities = {};
let itemwiseBinPriorityWeights = {};
resiliencyData.forEach(function (d) {
itemwiseBinPriorities[d["primaryKey"]] = [];
itemwiseBinPriorityWeights[d["primaryKey"]] = [];
let arr = [...d["binCandidates"]];
while (arr.length > 0) {
const mostFrequentElement = context.getMostFrequentElement(arr);
const frequencyOfMostFrequentElement = context.getFrequencyOfMostFrequentElement(arr);
// Trim the `arr' to now find the next mostFrequentElement and frequencyOfMostFrequentElement.
arr = arr.filter(function (item) { return item !== mostFrequentElement; });
// Add to the priority lists
itemwiseBinPriorities[d["primaryKey"]].push(mostFrequentElement);
itemwiseBinPriorityWeights[d["primaryKey"]].push(frequencyOfMostFrequentElement);
}
for (let i = binID; i < binBreaks.length + 1; i++) {
if (binBreaks[i - 1] <= val) {
binID = i + 1;
if (!(binID in binSizes)) {
binSizes[binID] = 0;
});
// Now, iterate through the TOP priority bins for all data items and put them into those bins.
// Then, compute the min-max of these bins OR basically, determine if they are in an AP.
// If they are in an arithmetic progression, well and good.
// If not, there is a need to deprioritize the preferences of the boundary data items and reclassify them to their next best bin priority.
// Keep doing this until there is a solution.
let binInfo = {};
let priorityBins = [];
resiliencyData.forEach(function (d) {
let priorityBin = itemwiseBinPriorities[d["primaryKey"]][0]; // First element is highest priority.
if (!(priorityBin in binInfo)) {
binInfo[priorityBin] = [];
priorityBins.push(priorityBin);
}
binInfo[priorityBin].push(d["value"]);
});
// Sort priorityBins from something like [3, 2, 4, 5, 1] to [1, 2, 3, 4, 5] (No harm in doing this)
priorityBins = priorityBins.sort((n1, n2) => n1 - n2);
// Sort within the priority bins
priorityBins.forEach(function (priorityBin, valindex) {
binInfo[priorityBin] = binInfo[priorityBin].sort((n1, n2) => n1 - n2);
// The first item from the 2nd bin onwards would be the binBreaks.
// TODO: Consideration: Instead of taking the FIRST element of the 2nd item (or the last element of the 1st item), consider taking the AVERAGE of the two! Might be very interesting as they will absolutely ensure the respective points eventually end up in the appropriate bin, i.e., not get into > or >= dilemmas.
if (valindex > 0) {
binBreaks.push(binInfo[priorityBin][0]);
}
});
// New: Round all binBreaks
binBreaks = binBreaks.map((item) => parseFloat(item.toFixed(2)));
binBreaks = binBreaks.sort((n1, n2) => n1 - n2);
// Compute Bin Sizes
let binSizes = context.computeBinSizes(binBreaks);
// Compute Data-> Bin Assignments
let dataBinAssignments = context.computeDataBinAssignments(binBreaks);
// Return final Bin Object
return {
"binCount": binBreaks.length + 1,
"binBreaks": context.roundToPrecision(binBreaks, context.precision),
"binSizes": binSizes,
"dataRange": [context.min, context.max],
"dataBinAssignments": dataBinAssignments,
"binObjs": binObjs,
"mostFrequentBins": mostFrequentBins,
"frequencyOfMostFrequentBins": frequencyOfMostFrequentBins
};
}
// Compute Bin Size from Bin Breaks
computeBinSizes(binBreaks) {
let context = this;
// Reset Bin Sizes;
let binSizes = {};
let invalids = 0;
// Iterate through all values for the current feature/attribute.
// Where to put NaNs / nulls? For now, just ignore them; we need valindex hence still need to iterate over all.
context.rawData.forEach(function (val, valindex) {
if (context.isValid(val)) {
// We want 1 index, not 0 index.
let binID = 1;
if (!(binID in binSizes)) {
binSizes[binID] = 0;
}
for (let i = binID; i < binBreaks.length + 1; i++) {
if (binBreaks[i - 1] <= val) {
binID = i + 1;
if (!(binID in binSizes)) {
binSizes[binID] = 0;
}
}
}
// Increment the binSizes counter for each binIndex.
binSizes[binID] += 1;
}
// Increment the binSizes counter for each binIndex.
binSizes[binID] += 1;
}
else {
invalids++;
}
});
return { "valids": binSizes, "invalids": invalids };
}
// Compute Data -> Bin Assignments from Bin Breaks
computeDataBinAssignments(binBreaks) {
let context = this;
let dataBinAssignments = {};
// Iterate through all values for the current feature/attribute.
// Where to put NaNs / nulls? For now, just ignore them; we need valindex hence still need to iterate over all.
context.rawData.forEach(function (val, valindex) {
// Let the primary key be index of the item in the rawDataArray.
let primaryKey = valindex.toString();
if (context.isValid(val)) {
// We want 1 index, not 0 index.
let binID = 1;
for (let i = binID; i < binBreaks.length + 1; i++) {
if (binBreaks[i - 1] < val) {
binID = i + 1;
else {
invalids++;
}
});
return { "valids": binSizes, "invalids": invalids };
}
// Compute Data -> Bin Assignments from Bin Breaks
computeDataBinAssignments(binBreaks) {
let context = this;
let dataBinAssignments = {};
// Iterate through all values for the current feature/attribute.
// Where to put NaNs / nulls? For now, just ignore them; we need valindex hence still need to iterate over all.
context.rawData.forEach(function (val, valindex) {
// Let the primary key be index of the item in the rawDataArray.
let primaryKey = valindex.toString();
if (context.isValid(val)) {
// We want 1 index, not 0 index.
let binID = 1;
for (let i = binID; i < binBreaks.length + 1; i++) {
if (binBreaks[i - 1] < val) {
binID = i + 1;
}
}
// Assign the binId (indexed at 1) to the primaryKey
dataBinAssignments[primaryKey] = binID;
}
// Assign the binId (indexed at 1) to the primaryKey
dataBinAssignments[primaryKey] = binID;
}
else {
// For invalid values, the binID will be null, by design choice.
dataBinAssignments[primaryKey] = null;
}
});
return dataBinAssignments;
}
/*
* Return true if the input entity is a valid number and false otherwise.
*/
isValid(val) {
return !Number.isNaN(Number(val)) && val != undefined && val != null && val != "";
}
/*
* Round array items
*/
roundToPrecision(array, precision = 2) {
return array.map((item) => parseFloat(item.toFixed(precision)));
}
/*
* Create a visualization showing the bin intervals, counts, sizes. Currently using Vega-Lite.
*/
visualize(binBreaks, binningMethodName, colorSchemeCode = "viridis") {
let context = this;
/**
* Important match because `boxPlot` and `standardDeviation` are such that their extents can cross the dataMin and dataMax.
* Hence, compute [binMin, binMax]
*/
let dataMin = context.min;
let dataMax = context.max;
let [binMin, binMax] = [Infinity, -Infinity];
for (var i = 0; i < binBreaks.length; i++) {
let val = binBreaks[i];
if (binMin > val) {
binMin = val;
}
if (binMax < val) {
binMax = val;
}
else {
// For invalid values, the binID will be null, by design choice.
dataBinAssignments[primaryKey] = null;
}
});
return dataBinAssignments;
}
if (binMin > dataMin) {
binMin = dataMin;
/*
* Return true if the input entity is a valid number and false otherwise.
*/
isValid(val) {
return !Number.isNaN(Number(val)) && val != undefined && val != null && val != "";
}
if (binMax < dataMax) {
binMax = dataMax;
/*
* Round array items
*/
roundToPrecision(array, precision = 2) {
return array.map((item) => parseFloat(item.toFixed(precision)));
}
let data = [];
let dataTicks = [];
let binSizes = context.computeBinSizes(binBreaks);
let validBinSizes = binSizes["valids"];
let invalidBinSizes = binSizes["invalids"];
for (var i = 0; i <= binBreaks.length; i++) {
let obj = {};
let binID = (i + 1).toString();
if (i == 0) {
obj["binMin"] = binMin;
obj["binMax"] = binBreaks[i];
// Add first binMin
if (!isNaN(obj["binMin"])) {
dataTicks.push(obj["binMin"]);
/*
* Create a visualization showing the bin intervals, counts, sizes. Currently using Vega-Lite.
*/
visualize(binBreaks, binningMethodName, colorSchemeCode = "viridis") {
let context = this;
/**
* Important match because `boxPlot` and `standardDeviation` are such that their extents can cross the dataMin and dataMax.
* Hence, compute [binMin, binMax]
*/
let dataMin = context.min;
let dataMax = context.max;
let [binMin, binMax] = [Infinity, -Infinity];
for (var i = 0; i < binBreaks.length; i++) {
let val = binBreaks[i];
if (binMin > val) {
binMin = val;
}
if (binMax < val) {
binMax = val;
}
}
else if (i <= binBreaks.length - 1) {
obj["binMin"] = binBreaks[i - 1];
obj["binMax"] = binBreaks[i];
if (binMin > dataMin) {
binMin = dataMin;
}
else {
obj["binMin"] = binBreaks[i - 1];
obj["binMax"] = binMax;
if (binMax < dataMax) {
binMax = dataMax;
}
obj["binningMethod"] = binningMethodName;
obj["binID"] = binID.toString();
obj["binSize"] = validBinSizes[binID];
// Add all binEnds
if (!isNaN(obj["binMax"])) {
dataTicks.push(obj["binMax"]);
let data = [];
let dataTicks = [];
let binSizes = context.computeBinSizes(binBreaks);
let validBinSizes = binSizes["valids"];
let invalidBinSizes = binSizes["invalids"];
for (var i = 0; i <= binBreaks.length; i++) {
let obj = {};
let binID = (i + 1).toString();
if (i == 0) {
obj["binMin"] = binMin;
obj["binMax"] = binBreaks[i];
// Add first binMin
if (!isNaN(obj["binMin"])) {
dataTicks.push(obj["binMin"]);
}
}
else if (i <= binBreaks.length - 1) {
obj["binMin"] = binBreaks[i - 1];
obj["binMax"] = binBreaks[i];
}
else {
obj["binMin"] = binBreaks[i - 1];
obj["binMax"] = binMax;
}
obj["binningMethod"] = binningMethodName;
obj["binID"] = binID.toString();
obj["binSize"] = validBinSizes[binID];
// Add all binEnds
if (!isNaN(obj["binMax"])) {
dataTicks.push(obj["binMax"]);
}
data.push(obj);
}
data.push(obj);
}
const specConstants = {
width: 700,
height: 50
};
let vlSpec = {
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width": specConstants.width,
"height": specConstants.height,
"background": null,
"config": {
"tick": {
"bandSize": 20
const specConstants = {
width: 700,
height: 50
};
let vlSpec = {
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width": specConstants.width,
"height": specConstants.height,
"background": null,
"config": {
"tick": {
"bandSize": 20
},
"view": { "stroke": null },
"axis": {
"domain": false, "grid": false, "ticks": false
}
},
"view": { "stroke": null },
"axis": {
"domain": false, "grid": false, "ticks": false
}
},
"layer": [{
"title": null,
"data": {
"values": data
},
"mark": {
"type": "bar",
"tooltip": { "content": "data" }
},
"transform": [{
"filter": "datum.binSize != 0"
}],
"encoding": {
"x": { "field": "binMin", "type": "quantitative", "axis": { "title": null, "values": binBreaks, "format": ".2f", "labelFontSize": 16 }, "scale": { "domain": [binMin, binMax] } },
"y": {
"field": "binningMethod", "type": "ordinal", "axis": {
"title": null,
// "labelFontSize": 16,
// "labelLimit": 250,
// "labelPadding": 10,
"labels": false
}
"layer": [{
"title": null,
"data": {
"values": data
},
"x2": {
"field": "binMax", "scale": { "domain": [binMin, binMax] }, "axis": {
"format": ".2f",
"labelFontSize": 16
}
"mark": {
"type": "bar",
"tooltip": { "content": "data" }
},
"size": {
"field": "binSize",
"legend": null,
// "legend": {
// "titleFontSize": 22,
// "labelFontSize": 18,
// "offset": 36
// },
"scale": {
"type": "linear",
"range": [5, specConstants.height / 2]
"transform": [{
"filter": "datum.binSize != 0"
}],
"encoding": {
"x": { "field": "binMin", "type": "quantitative", "axis": { "title": null, "values": binBreaks, "format": ".2f", "labelFontSize": 16 }, "scale": { "domain": [binMin, binMax] } },
"y": {
"field": "binningMethod", "type": "ordinal", "axis": {
"title": null,
// "labelFontSize": 16,
// "labelLimit": 250,
// "labelPadding": 10,
"labels": false
}
},
"x2": {
"field": "binMax", "scale": { "domain": [binMin, binMax] }, "axis": {
"format": ".2f",
"labelFontSize": 16
}
},
"size": {
"field": "binSize",
"legend": null,
// "legend": {
// "titleFontSize": 22,
// "labelFontSize": 18,
// "offset": 36
// },
"scale": {
"type": "linear",
"range": [5, specConstants.height / 2]
}
},
"color": {
"field": "binID",
"type": "quantitative",
"scale": {
"domain": data.map((obj) => obj["binID"]),
"scheme": colorSchemeCode,
"type": "threshold"
},
"legend": null,
// "legend": {
// "titleFontSize": 22,
// "labelFontSize": 18,
// "offset": 36
// }
}
}
},
{
"title": null,
"data": {
"values": data
},
"color": {
"field": "binID",
"type": "quantitative",
"scale": {
"domain": data.map((obj) => obj["binID"]),
"scheme": colorSchemeCode,
"type": "threshold"
},
"legend": null,
// "legend": {
// "titleFontSize": 22,
// "labelFontSize": 18,
// "offset": 36
// }
"mark": {
"type": "rule",
"tooltip": { "content": "data" }
},
"transform": [{
"filter": "datum.binSize == 0"
}],
"encoding": {
"x": { "field": "binMin", "type": "quantitative", "axis": { "title": null, "values": binBreaks, "format": ".2f" }, "scale": { "domain": [binMin, binMax] } },
"y": { "field": "binningMethod", "type": "ordinal", "axis": { "title": null, "labelFontSize": 16, "labelLimit": 250, "labelPadding": 10 } },
"x2": { "field": "binMax", "scale": { "domain": [binMin, binMax] }, "axis": { "format": ".2f" } },
"size": { "value": 2 },
"strokeDash": { "value": [8, 8] }
}
}
},
{
"title": null,
"data": {
"values": data
},
"mark": {
"type": "rule",
"tooltip": { "content": "data" }
},
"transform": [{
"filter": "datum.binSize == 0"
}],
"encoding": {
"x": { "field": "binMin", "type": "quantitative", "axis": { "title": null, "values": binBreaks, "format": ".2f" }, "scale": { "domain": [binMin, binMax] } },
"y": { "field": "binningMethod", "type": "ordinal", "axis": { "title": null, "labelFontSize": 16, "labelLimit": 250, "labelPadding": 10 } },
"x2": { "field": "binMax", "scale": { "domain": [binMin, binMax] }, "axis": { "format": ".2f" } },
"size": { "value": 2 },
"strokeDash": { "value": [8, 8] }
}
},
{
"title": null,
"data": {
"values": dataTicks
},
"mark": {
"type": "tick",
"tooltip": { "content": "data" },
"fill": "black",
"orient": "vertical",
"thickness": 3,
"height": specConstants.height / 2
},
"encoding": {
"x": { "field": "data", "type": "quantitative", "scale": { "domain": [binMin, binMax] } }
}
}]
};
return vlSpec;
{
"title": null,
"data": {
"values": dataTicks
},
"mark": {
"type": "tick",
"tooltip": { "content": "data" },
"fill": "black",
"orient": "vertical",
"thickness": 3,
"height": specConstants.height / 2
},
"encoding": {
"x": { "field": "data", "type": "quantitative", "scale": { "domain": [binMin, binMax] } }
}
}]
};
return vlSpec;
}
}
}
exports.BinGuru = BinGuru;
exports.BinGuru = BinGuru;
});
{
"name": "binguru",
"version": "1.0.0-alpha.1.0",
"description": "BinGuru is a Javascript package with an API to 18+ established data binning / data classification methods, often used for visualizing data on choropleth maps. It also includes an implementation of a new, consensus binning method, 'Resiliency'.",
"version": "1.0.0-alpha.2.0",
"description": "BinGuru is a Javascript package with an API to several established data binning / data classification methods, often used for visualizing data on choropleth maps. It also includes an implementation of a new, consensus binning method, 'Resiliency'.",
"main": "lib/index.js",

@@ -18,3 +18,2 @@ "scripts": {

},
"bundledDependencies": true,
"keywords": [

@@ -21,0 +20,0 @@ "data",

@@ -237,3 +237,3 @@ /***********************************************************

let clusters = ss.ckmeans(context.data, context.binCount);
binBreaks = clusters.map(function (cluster) {
binBreaks = clusters.map(function (cluster:number[]) {
return cluster[cluster.length - 1]; // Last element of each cluster is the bin's upper limit;

@@ -240,0 +240,0 @@ });

@@ -6,3 +6,4 @@ {

"lib": ["es2019"], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
"module": "commonjs", /* Specify what module code is generated. */
"module": "umd", /* Specify what module code is generated. */
"moduleResolution":"node",
"outDir": "./lib/", /* Specify an output folder for all emitted files. */

@@ -9,0 +10,0 @@ "esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc