Comparing version 0.0.6 to 0.0.7
460
index.js
@@ -1,93 +0,1 @@ | ||
function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { | ||
try { | ||
var info = gen[key](arg); | ||
var value = info.value; | ||
} catch (error) { | ||
reject(error); | ||
return; | ||
} | ||
if (info.done) { | ||
resolve(value); | ||
} else { | ||
Promise.resolve(value).then(_next, _throw); | ||
} | ||
} | ||
function _asyncToGenerator(fn) { | ||
return function () { | ||
var self = this, | ||
args = arguments; | ||
return new Promise(function (resolve, reject) { | ||
var gen = fn.apply(self, args); | ||
function _next(value) { | ||
asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); | ||
} | ||
function _throw(err) { | ||
asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); | ||
} | ||
_next(undefined); | ||
}); | ||
}; | ||
} | ||
function _classCallCheck(instance, Constructor) { | ||
if (!(instance instanceof Constructor)) { | ||
throw new TypeError("Cannot call a class as a function"); | ||
} | ||
} | ||
function _defineProperties(target, props) { | ||
for (var i = 0; i < props.length; i++) { | ||
var descriptor = props[i]; | ||
descriptor.enumerable = descriptor.enumerable || false; | ||
descriptor.configurable = true; | ||
if ("value" in descriptor) descriptor.writable = true; | ||
Object.defineProperty(target, descriptor.key, descriptor); | ||
} | ||
} | ||
function _createClass(Constructor, protoProps, staticProps) { | ||
if (protoProps) _defineProperties(Constructor.prototype, protoProps); | ||
if (staticProps) _defineProperties(Constructor, staticProps); | ||
return Constructor; | ||
} | ||
function _defineProperty(obj, key, value) { | ||
if (key in obj) { | ||
Object.defineProperty(obj, key, { | ||
value: value, | ||
enumerable: true, | ||
configurable: true, | ||
writable: true | ||
}); | ||
} else { | ||
obj[key] = value; | ||
} | ||
return obj; | ||
} | ||
function _objectSpread(target) { | ||
for (var i = 1; i < arguments.length; i++) { | ||
var source = arguments[i] != null ? arguments[i] : {}; | ||
var ownKeys = Object.keys(source); | ||
if (typeof Object.getOwnPropertySymbols === 'function') { | ||
ownKeys = ownKeys.concat(Object.getOwnPropertySymbols(source).filter(function (sym) { | ||
return Object.getOwnPropertyDescriptor(source, sym).enumerable; | ||
})); | ||
} | ||
ownKeys.forEach(function (key) { | ||
_defineProperty(target, key, source[key]); | ||
}); | ||
} | ||
return target; | ||
} | ||
/** | ||
@@ -103,13 +11,7 @@ * Cede control to the event loop for one tick, from within an async function. | ||
function tick() { | ||
return new Promise(function (resolve) { | ||
return setTimeout(resolve, 0); | ||
}); | ||
return new Promise(resolve => setTimeout(resolve, 0)); | ||
} | ||
var KeyClusterer = | ||
/*#__PURE__*/ | ||
function () { | ||
function KeyClusterer(bucket, keyer, options) { | ||
_classCallCheck(this, KeyClusterer); | ||
class KeyClusterer { | ||
constructor(bucket, keyer, options) { | ||
this.bucket = bucket; | ||
@@ -122,123 +24,73 @@ this.keyer = keyer; | ||
_createClass(KeyClusterer, [{ | ||
key: "cancel", | ||
value: function cancel() { | ||
this.canceled = true; | ||
} | ||
}, { | ||
key: "cluster", | ||
value: function () { | ||
var _cluster = _asyncToGenerator( | ||
/*#__PURE__*/ | ||
regeneratorRuntime.mark(function _callee() { | ||
var bucket, keyer, _this$options, tickMs, nIterationsBetweenTickChecks, bins, keyToBin, i, t1, strs, _i, str, t2, count, key, bin, maxCount; | ||
cancel() { | ||
this.canceled = true; | ||
} | ||
return regeneratorRuntime.wrap(function _callee$(_context) { | ||
while (1) { | ||
switch (_context.prev = _context.next) { | ||
case 0: | ||
bucket = this.bucket, keyer = this.keyer; | ||
_this$options = this.options, tickMs = _this$options.tickMs, nIterationsBetweenTickChecks = _this$options.nIterationsBetweenTickChecks; | ||
bins = []; | ||
keyToBin = {}; | ||
i = 0; | ||
t1 = new Date(); | ||
strs = Object.keys(bucket); | ||
_i = 0; | ||
async cluster() { | ||
const bucket = this.bucket, | ||
keyer = this.keyer; | ||
const _this$options = this.options, | ||
tickMs = _this$options.tickMs, | ||
nIterationsBetweenTickChecks = _this$options.nIterationsBetweenTickChecks; | ||
const bins = []; | ||
const keyToBin = {}; | ||
let t1 = new Date(); | ||
const strs = Object.keys(bucket); | ||
case 8: | ||
if (!(_i < strs.length)) { | ||
_context.next = 29; | ||
break; | ||
} | ||
for (let i = 0; i < strs.length; i++) { | ||
const str = strs[i]; | ||
str = strs[_i]; | ||
i += 1; | ||
if ((i + 1 & nIterationsBetweenTickChecks) === 0) { | ||
const t2 = new Date(); | ||
if (!((i & nIterationsBetweenTickChecks) === 0)) { | ||
_context.next = 20; | ||
break; | ||
} | ||
if (t2 - t1 >= tickMs) { | ||
this.progress = i / strs.length; | ||
await tick(); // We can only be canceled while we aren't executing. So now that | ||
// we're back from our tick is the only time we need to check. | ||
t2 = new Date(); | ||
if (this.canceled) { | ||
throw new Error('canceled'); | ||
} | ||
if (!(t2 - t1 >= tickMs)) { | ||
_context.next = 20; | ||
break; | ||
} | ||
t1 = new Date(); | ||
} | ||
} | ||
this.progress = (i - 1) / strs.length; | ||
_context.next = 17; | ||
return tick(); | ||
const count = bucket[str]; | ||
const key = keyer(str); | ||
let bin = keyToBin[key]; | ||
case 17: | ||
if (!this.canceled) { | ||
_context.next = 19; | ||
break; | ||
} | ||
if (!bin) { | ||
bin = { | ||
key: key, | ||
name: str, | ||
count: 0, | ||
bucket: {} | ||
}; | ||
keyToBin[key] = bin; | ||
bins.push(bin); | ||
} else { | ||
// Maybe change name. We do it in this loop so we're O(n) | ||
const maxCount = bin.bucket[bin.name]; | ||
throw new Error('canceled'); | ||
if (count > maxCount || count === maxCount && str.localeCompare(bin.name) < 0) { | ||
bin.name = str; | ||
} | ||
} | ||
case 19: | ||
t1 = new Date(); | ||
bin.count += count; | ||
bin.bucket[str] = count; | ||
} | ||
case 20: | ||
count = bucket[str]; | ||
key = keyer(str); | ||
bin = keyToBin[key]; | ||
this.progress = 1; | ||
return bins.filter(b => Object.keys(b.bucket).length > 1); | ||
} | ||
if (!bin) { | ||
bin = { | ||
key: key, | ||
name: str, | ||
count: 0, | ||
bucket: {} | ||
}; | ||
keyToBin[key] = bin; | ||
bins.push(bin); | ||
} else { | ||
// Maybe change name. We do it in this loop so we're O(n) | ||
maxCount = bin.bucket[bin.name]; | ||
} | ||
if (count > maxCount || count === maxCount && str.localeCompare(bin.name) < 0) { | ||
bin.name = str; | ||
} | ||
} | ||
function clusterByKey(bucket, keyer, options = {}) { | ||
options = Object.assign({ | ||
tickMs: 8, | ||
nIterationsBetweenTickChecks: 0xfff // must be power of two, minus one | ||
bin.count += count; | ||
bin.bucket[str] = count; | ||
case 26: | ||
_i++; | ||
_context.next = 8; | ||
break; | ||
case 29: | ||
this.progress = 1; | ||
return _context.abrupt("return", bins.filter(function (b) { | ||
return Object.keys(b.bucket).length > 1; | ||
})); | ||
case 31: | ||
case "end": | ||
return _context.stop(); | ||
} | ||
} | ||
}, _callee, this); | ||
})); | ||
return function cluster() { | ||
return _cluster.apply(this, arguments); | ||
}; | ||
}() | ||
}]); | ||
return KeyClusterer; | ||
}(); | ||
function clusterByKey(bucket, keyer) { | ||
var options = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {}; | ||
options = _objectSpread({ | ||
tickMs: 8, | ||
nIterationsBetweenTickChecks: 0xfff | ||
}, options); | ||
@@ -248,8 +100,4 @@ return new KeyClusterer(bucket, keyer, options); | ||
var KnnClusterer = | ||
/*#__PURE__*/ | ||
function () { | ||
function KnnClusterer(bucket, distance, radius, options) { | ||
_classCallCheck(this, KnnClusterer); | ||
class KnnClusterer { | ||
constructor(bucket, distance, radius, options) { | ||
this.bucket = bucket; | ||
@@ -263,137 +111,83 @@ this.distance = distance; | ||
_createClass(KnnClusterer, [{ | ||
key: "cancel", | ||
value: function cancel() { | ||
this.canceled = true; | ||
} | ||
}, { | ||
key: "cluster", | ||
value: function () { | ||
var _cluster = _asyncToGenerator( | ||
/*#__PURE__*/ | ||
regeneratorRuntime.mark(function _callee() { | ||
var bucket, distance, radius, _this$options, tickMs, nIterationsBetweenTickChecks, strs, nStrs, bins, t1, i, nComparisons, ai, a, aCount, bin, bi, t2, b, d, maxCount, bCount; | ||
cancel() { | ||
this.canceled = true; | ||
} | ||
return regeneratorRuntime.wrap(function _callee$(_context) { | ||
while (1) { | ||
switch (_context.prev = _context.next) { | ||
case 0: | ||
bucket = this.bucket, distance = this.distance, radius = this.radius; | ||
_this$options = this.options, tickMs = _this$options.tickMs, nIterationsBetweenTickChecks = _this$options.nIterationsBetweenTickChecks; | ||
strs = Object.keys(bucket); | ||
nStrs = strs.length; | ||
bins = []; | ||
t1 = new Date(); | ||
i = 0; | ||
nComparisons = Math.max(0, nStrs * (nStrs - 1)); | ||
ai = 0; | ||
async cluster() { | ||
const bucket = this.bucket, | ||
distance = this.distance, | ||
radius = this.radius; | ||
const _this$options = this.options, | ||
tickMs = _this$options.tickMs, | ||
nIterationsBetweenTickChecks = _this$options.nIterationsBetweenTickChecks; | ||
const strs = Object.keys(bucket); | ||
const nStrs = strs.length; | ||
const bins = []; | ||
let t1 = new Date(); | ||
let i = 0; | ||
const nComparisons = Math.max(0, nStrs * (nStrs - 1)); | ||
case 9: | ||
if (!(ai < nStrs)) { | ||
_context.next = 34; | ||
break; | ||
} | ||
for (let ai = 0; ai < nStrs; ai++) { | ||
const a = strs[ai]; | ||
const aCount = bucket[a]; | ||
let bin = null; // set iff any b clusters with a | ||
a = strs[ai]; | ||
aCount = bucket[a]; | ||
bin = null; // set iff any b clusters with a | ||
for (let bi = ai + 1; bi < nStrs; bi++) { | ||
i += 1; | ||
bi = ai + 1; | ||
if ((i & nIterationsBetweenTickChecks) === 0) { | ||
const t2 = new Date(); | ||
case 14: | ||
if (!(bi < nStrs)) { | ||
_context.next = 31; | ||
break; | ||
} | ||
if (t2 - t1 >= tickMs) { | ||
this.progress = (i - 1) / nComparisons; | ||
await tick(); // We can only be canceled while we aren't executing. So now that | ||
// we're back from our tick is the only time we need to check. | ||
i += 1; | ||
if (this.canceled) { | ||
throw new Error('canceled'); | ||
} | ||
if (!((i & nIterationsBetweenTickChecks) === 0)) { | ||
_context.next = 25; | ||
break; | ||
} | ||
t1 = new Date(); | ||
} | ||
} | ||
t2 = new Date(); | ||
const b = strs[bi]; | ||
const d = distance(a, b); | ||
if (!(t2 - t1 >= tickMs)) { | ||
_context.next = 25; | ||
break; | ||
} | ||
if (d <= radius) { | ||
if (!bin) { | ||
bin = { | ||
name: a, | ||
count: aCount, | ||
bucket: { | ||
[a]: aCount | ||
} | ||
}; | ||
bins.push(bin); | ||
} | ||
this.progress = (i - 1) / nComparisons; | ||
_context.next = 22; | ||
return tick(); | ||
const maxCount = bin.bucket[bin.name]; | ||
const bCount = bucket[b]; | ||
case 22: | ||
if (!this.canceled) { | ||
_context.next = 24; | ||
break; | ||
} | ||
if (bCount > maxCount || bCount === maxCount && b.localeCompare(bin.name) < 0) { | ||
bin.name = b; | ||
} | ||
throw new Error('canceled'); | ||
bin.count += bCount; | ||
bin.bucket[b] = bCount; | ||
} | ||
} | ||
} | ||
case 24: | ||
t1 = new Date(); | ||
this.progress = 1; | ||
return bins; | ||
} | ||
case 25: | ||
b = strs[bi]; | ||
d = distance(a, b); | ||
} | ||
if (d <= radius) { | ||
if (!bin) { | ||
bin = { | ||
name: a, | ||
count: aCount, | ||
bucket: _defineProperty({}, a, aCount) | ||
}; | ||
bins.push(bin); | ||
} | ||
function clusterByKnn(bucket, distance, radius, options = {}) { | ||
options = Object.assign({ | ||
tickMs: 8, | ||
nIterationsBetweenTickChecks: 0xfff // must be power of two, minus one | ||
maxCount = bin.bucket[bin.name]; | ||
bCount = bucket[b]; | ||
if (bCount > maxCount || bCount === maxCount && b.localeCompare(bin.name) < 0) { | ||
bin.name = b; | ||
} | ||
bin.count += bCount; | ||
bin.bucket[b] = bCount; | ||
} | ||
case 28: | ||
bi++; | ||
_context.next = 14; | ||
break; | ||
case 31: | ||
ai++; | ||
_context.next = 9; | ||
break; | ||
case 34: | ||
this.progress = 1; | ||
return _context.abrupt("return", bins); | ||
case 36: | ||
case "end": | ||
return _context.stop(); | ||
} | ||
} | ||
}, _callee, this); | ||
})); | ||
return function cluster() { | ||
return _cluster.apply(this, arguments); | ||
}; | ||
}() | ||
}]); | ||
return KnnClusterer; | ||
}(); | ||
function clusterByKnn(bucket, distance, radius) { | ||
var options = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : {}; | ||
options = _objectSpread({ | ||
tickMs: 8, | ||
nIterationsBetweenTickChecks: 0xfff | ||
}, options); | ||
@@ -400,0 +194,0 @@ return new KnnClusterer(bucket, distance, radius, options); |
function stripLatin1Accent(c) { | ||
switch (c) { | ||
case "\xC0": | ||
case "\xC1": | ||
case "\xC2": | ||
case "\xC3": | ||
case "\xC4": | ||
case "\xC5": | ||
case "\xE0": | ||
case "\xE1": | ||
case "\xE2": | ||
case "\xE3": | ||
case "\xE4": | ||
case "\xE5": | ||
case "\u0100": | ||
case "\u0101": | ||
case "\u0102": | ||
case "\u0103": | ||
case "\u0104": | ||
case "\u0105": | ||
case '\u00C0': | ||
case '\u00C1': | ||
case '\u00C2': | ||
case '\u00C3': | ||
case '\u00C4': | ||
case '\u00C5': | ||
case '\u00E0': | ||
case '\u00E1': | ||
case '\u00E2': | ||
case '\u00E3': | ||
case '\u00E4': | ||
case '\u00E5': | ||
case '\u0100': | ||
case '\u0101': | ||
case '\u0102': | ||
case '\u0103': | ||
case '\u0104': | ||
case '\u0105': | ||
return 'a'; | ||
case "\xC7": | ||
case "\xE7": | ||
case "\u0106": | ||
case "\u0107": | ||
case "\u0108": | ||
case "\u0109": | ||
case "\u010A": | ||
case "\u010B": | ||
case "\u010C": | ||
case "\u010D": | ||
case '\u00C7': | ||
case '\u00E7': | ||
case '\u0106': | ||
case '\u0107': | ||
case '\u0108': | ||
case '\u0109': | ||
case '\u010A': | ||
case '\u010B': | ||
case '\u010C': | ||
case '\u010D': | ||
return 'c'; | ||
case "\xD0": | ||
case "\xF0": | ||
case "\u010E": | ||
case "\u010F": | ||
case "\u0110": | ||
case "\u0111": | ||
case '\u00D0': | ||
case '\u00F0': | ||
case '\u010E': | ||
case '\u010F': | ||
case '\u0110': | ||
case '\u0111': | ||
return 'd'; | ||
case "\xC8": | ||
case "\xC9": | ||
case "\xCA": | ||
case "\xCB": | ||
case "\xE8": | ||
case "\xE9": | ||
case "\xEA": | ||
case "\xEB": | ||
case "\u0112": | ||
case "\u0113": | ||
case "\u0114": | ||
case "\u0115": | ||
case "\u0116": | ||
case "\u0117": | ||
case "\u0118": | ||
case "\u0119": | ||
case "\u011A": | ||
case "\u011B": | ||
case '\u00C8': | ||
case '\u00C9': | ||
case '\u00CA': | ||
case '\u00CB': | ||
case '\u00E8': | ||
case '\u00E9': | ||
case '\u00EA': | ||
case '\u00EB': | ||
case '\u0112': | ||
case '\u0113': | ||
case '\u0114': | ||
case '\u0115': | ||
case '\u0116': | ||
case '\u0117': | ||
case '\u0118': | ||
case '\u0119': | ||
case '\u011A': | ||
case '\u011B': | ||
return 'e'; | ||
case "\u011C": | ||
case "\u011D": | ||
case "\u011E": | ||
case "\u011F": | ||
case "\u0120": | ||
case "\u0121": | ||
case "\u0122": | ||
case "\u0123": | ||
case '\u011C': | ||
case '\u011D': | ||
case '\u011E': | ||
case '\u011F': | ||
case '\u0120': | ||
case '\u0121': | ||
case '\u0122': | ||
case '\u0123': | ||
return 'g'; | ||
case "\u0124": | ||
case "\u0125": | ||
case "\u0126": | ||
case "\u0127": | ||
case '\u0124': | ||
case '\u0125': | ||
case '\u0126': | ||
case '\u0127': | ||
return 'h'; | ||
case "\xCC": | ||
case "\xCD": | ||
case "\xCE": | ||
case "\xCF": | ||
case "\xEC": | ||
case "\xED": | ||
case "\xEE": | ||
case "\xEF": | ||
case "\u0128": | ||
case "\u0129": | ||
case "\u012A": | ||
case "\u012B": | ||
case "\u012C": | ||
case "\u012D": | ||
case "\u012E": | ||
case "\u012F": | ||
case "\u0130": | ||
case "\u0131": | ||
case '\u00CC': | ||
case '\u00CD': | ||
case '\u00CE': | ||
case '\u00CF': | ||
case '\u00EC': | ||
case '\u00ED': | ||
case '\u00EE': | ||
case '\u00EF': | ||
case '\u0128': | ||
case '\u0129': | ||
case '\u012A': | ||
case '\u012B': | ||
case '\u012C': | ||
case '\u012D': | ||
case '\u012E': | ||
case '\u012F': | ||
case '\u0130': | ||
case '\u0131': | ||
return 'i'; | ||
case "\u0134": | ||
case "\u0135": | ||
case '\u0134': | ||
case '\u0135': | ||
return 'j'; | ||
case "\u0136": | ||
case "\u0137": | ||
case "\u0138": | ||
case '\u0136': | ||
case '\u0137': | ||
case '\u0138': | ||
return 'k'; | ||
case "\u0139": | ||
case "\u013A": | ||
case "\u013B": | ||
case "\u013C": | ||
case "\u013D": | ||
case "\u013E": | ||
case "\u013F": | ||
case "\u0140": | ||
case "\u0141": | ||
case "\u0142": | ||
case '\u0139': | ||
case '\u013A': | ||
case '\u013B': | ||
case '\u013C': | ||
case '\u013D': | ||
case '\u013E': | ||
case '\u013F': | ||
case '\u0140': | ||
case '\u0141': | ||
case '\u0142': | ||
return 'l'; | ||
case "\xD1": | ||
case "\xF1": | ||
case "\u0143": | ||
case "\u0144": | ||
case "\u0145": | ||
case "\u0146": | ||
case "\u0147": | ||
case "\u0148": | ||
case "\u0149": | ||
case "\u014A": | ||
case "\u014B": | ||
case '\u00D1': | ||
case '\u00F1': | ||
case '\u0143': | ||
case '\u0144': | ||
case '\u0145': | ||
case '\u0146': | ||
case '\u0147': | ||
case '\u0148': | ||
case '\u0149': | ||
case '\u014A': | ||
case '\u014B': | ||
return 'n'; | ||
case "\xD2": | ||
case "\xD3": | ||
case "\xD4": | ||
case "\xD5": | ||
case "\xD6": | ||
case "\xD8": | ||
case "\xF2": | ||
case "\xF3": | ||
case "\xF4": | ||
case "\xF5": | ||
case "\xF6": | ||
case "\xF8": | ||
case "\u014C": | ||
case "\u014D": | ||
case "\u014E": | ||
case "\u014F": | ||
case "\u0150": | ||
case "\u0151": | ||
case '\u00D2': | ||
case '\u00D3': | ||
case '\u00D4': | ||
case '\u00D5': | ||
case '\u00D6': | ||
case '\u00D8': | ||
case '\u00F2': | ||
case '\u00F3': | ||
case '\u00F4': | ||
case '\u00F5': | ||
case '\u00F6': | ||
case '\u00F8': | ||
case '\u014C': | ||
case '\u014D': | ||
case '\u014E': | ||
case '\u014F': | ||
case '\u0150': | ||
case '\u0151': | ||
return 'o'; | ||
case "\u0154": | ||
case "\u0155": | ||
case "\u0156": | ||
case "\u0157": | ||
case "\u0158": | ||
case "\u0159": | ||
case '\u0154': | ||
case '\u0155': | ||
case '\u0156': | ||
case '\u0157': | ||
case '\u0158': | ||
case '\u0159': | ||
return 'r'; | ||
case "\u015A": | ||
case "\u015B": | ||
case "\u015C": | ||
case "\u015D": | ||
case "\u015E": | ||
case "\u015F": | ||
case "\u0160": | ||
case "\u0161": | ||
case "\u017F": | ||
case '\u015A': | ||
case '\u015B': | ||
case '\u015C': | ||
case '\u015D': | ||
case '\u015E': | ||
case '\u015F': | ||
case '\u0160': | ||
case '\u0161': | ||
case '\u017F': | ||
return 's'; | ||
case "\u0162": | ||
case "\u0163": | ||
case "\u0164": | ||
case "\u0165": | ||
case "\u0166": | ||
case "\u0167": | ||
case '\u0162': | ||
case '\u0163': | ||
case '\u0164': | ||
case '\u0165': | ||
case '\u0166': | ||
case '\u0167': | ||
return 't'; | ||
case "\xD9": | ||
case "\xDA": | ||
case "\xDB": | ||
case "\xDC": | ||
case "\xF9": | ||
case "\xFA": | ||
case "\xFB": | ||
case "\xFC": | ||
case "\u0168": | ||
case "\u0169": | ||
case "\u016A": | ||
case "\u016B": | ||
case "\u016C": | ||
case "\u016D": | ||
case "\u016E": | ||
case "\u016F": | ||
case "\u0170": | ||
case "\u0171": | ||
case "\u0172": | ||
case "\u0173": | ||
case '\u00D9': | ||
case '\u00DA': | ||
case '\u00DB': | ||
case '\u00DC': | ||
case '\u00F9': | ||
case '\u00FA': | ||
case '\u00FB': | ||
case '\u00FC': | ||
case '\u0168': | ||
case '\u0169': | ||
case '\u016A': | ||
case '\u016B': | ||
case '\u016C': | ||
case '\u016D': | ||
case '\u016E': | ||
case '\u016F': | ||
case '\u0170': | ||
case '\u0171': | ||
case '\u0172': | ||
case '\u0173': | ||
return 'u'; | ||
case "\u0174": | ||
case "\u0175": | ||
case '\u0174': | ||
case '\u0175': | ||
return 'w'; | ||
case "\xDD": | ||
case "\xFD": | ||
case "\xFF": | ||
case "\u0176": | ||
case "\u0177": | ||
case "\u0178": | ||
case '\u00DD': | ||
case '\u00FD': | ||
case '\u00FF': | ||
case '\u0176': | ||
case '\u0177': | ||
case '\u0178': | ||
return 'y'; | ||
case "\u0179": | ||
case "\u017A": | ||
case "\u017B": | ||
case "\u017C": | ||
case "\u017D": | ||
case "\u017E": | ||
case '\u0179': | ||
case '\u017A': | ||
case '\u017B': | ||
case '\u017C': | ||
case '\u017D': | ||
case '\u017E': | ||
return 'z'; | ||
@@ -227,7 +227,7 @@ | ||
var NonAscii = /[\u0080-\uffff]/g; // Punctuation regex built using http://www.unicode.org/Public/UNIDATA/UnicodeData.txt | ||
const NonAscii = /[\u0080-\uffff]/g; // Punctuation regex built using http://www.unicode.org/Public/UNIDATA/UnicodeData.txt | ||
var PunctuationControl = /(?:[\0-\x08\n-\x1F\x7F]|(?:[!-#%-\*,-\/:;\?@\[-\]_\{\}\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4E\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD803[\uDF55-\uDF59]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC8\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDC4B-\uDC4F\uDC5B\uDC5D\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDE60-\uDE6C\uDF3C-\uDF3E]|\uD806[\uDC3B\uDE3F-\uDE46\uDE9A-\uDE9C\uDE9E-\uDEA2]|\uD807[\uDC41-\uDC45\uDC70\uDC71\uDEF7\uDEF8]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD81B[\uDE97-\uDE9A]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]|\uD83A[\uDD5E\uDD5F]))+/g; // by the time we're searching for whitespace we've already nixed control chars | ||
const PunctuationControl = /(?:[\0-\x08\n-\x1F\x7F]|[!-#%-\*,-\/:;\?@\[-\]_\{\}\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4E\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65\u{10100}-\u{10102}\u{1039F}\u{103D0}\u{1056F}\u{10857}\u{1091F}\u{1093F}\u{10A50}-\u{10A58}\u{10A7F}\u{10AF0}-\u{10AF6}\u{10B39}-\u{10B3F}\u{10B99}-\u{10B9C}\u{10F55}-\u{10F59}\u{11047}-\u{1104D}\u{110BB}\u{110BC}\u{110BE}-\u{110C1}\u{11140}-\u{11143}\u{11174}\u{11175}\u{111C5}-\u{111C8}\u{111CD}\u{111DB}\u{111DD}-\u{111DF}\u{11238}-\u{1123D}\u{112A9}\u{1144B}-\u{1144F}\u{1145B}\u{1145D}\u{114C6}\u{115C1}-\u{115D7}\u{11641}-\u{11643}\u{11660}-\u{1166C}\u{1173C}-\u{1173E}\u{1183B}\u{11A3F}-\u{11A46}\u{11A9A}-\u{11A9C}\u{11A9E}-\u{11AA2}\u{11C41}-\u{11C45}\u{11C70}\u{11C71}\u{11EF7}\u{11EF8}\u{12470}-\u{12474}\u{16A6E}\u{16A6F}\u{16AF5}\u{16B37}-\u{16B3B}\u{16B44}\u{16E97}-\u{16E9A}\u{1BC9F}\u{1DA87}-\u{1DA8B}\u{1E95E}\u{1E95F}])+/ug; // by the time we're searching for whitespace we've already nixed control chars | ||
var Whitespace = /[\x00-\x20]+/g; | ||
const Whitespace = /[\x00-\x20]+/g; | ||
@@ -239,32 +239,15 @@ function doFingerprint(s) { | ||
s = s.replace(PunctuationControl, ''); | ||
var tokens = s.split(Whitespace); | ||
const tokens = s.split(Whitespace); | ||
tokens.sort(); | ||
var uniqueTokens = []; | ||
var lastToken = null; | ||
var _iteratorNormalCompletion = true; | ||
var _didIteratorError = false; | ||
var _iteratorError = undefined; | ||
const uniqueTokens = []; | ||
let lastToken = null; | ||
const nTokens = tokens.length; | ||
try { | ||
for (var _iterator = tokens[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) { | ||
var token = _step.value; | ||
for (let i = 0; i < nTokens; i++) { | ||
const token = tokens[i]; | ||
if (token !== lastToken) { | ||
uniqueTokens.push(token); | ||
lastToken = token; | ||
} | ||
if (token !== lastToken) { | ||
uniqueTokens.push(token); | ||
lastToken = token; | ||
} | ||
} catch (err) { | ||
_didIteratorError = true; | ||
_iteratorError = err; | ||
} finally { | ||
try { | ||
if (!_iteratorNormalCompletion && _iterator.return != null) { | ||
_iterator.return(); | ||
} | ||
} finally { | ||
if (_didIteratorError) { | ||
throw _iteratorError; | ||
} | ||
} | ||
} | ||
@@ -271,0 +254,0 @@ |
@@ -106,7 +106,5 @@ var jsLevenshtein = (function() | ||
var ret = function ret() { | ||
return jsLevenshtein; | ||
}; | ||
const ret = () => jsLevenshtein; | ||
export default ret; | ||
//# sourceMappingURL=levenshtein.js.map |
{ | ||
"name": "clustring", | ||
"version": "0.0.6", | ||
"version": "0.0.7", | ||
"description": "Algorithms for clustering strings", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -66,3 +66,3 @@ clustring | ||
.then(bins => { ... }) | ||
// bins will be same as in previous example. | ||
// bins will be same as in previous example, minus "key" | ||
``` | ||
@@ -69,0 +69,0 @@ |
@@ -29,8 +29,9 @@ import { tick } from '../util' | ||
for (const str of strs) { | ||
i += 1 | ||
if ((i & nIterationsBetweenTickChecks) === 0) { | ||
for (let i = 0; i < strs.length; i++) { | ||
const str = strs[i] | ||
if (((i + 1) & nIterationsBetweenTickChecks) === 0) { | ||
const t2 = new Date() | ||
if (t2 - t1 >= tickMs) { | ||
this.progress = (i - 1) / strs.length | ||
this.progress = i / strs.length | ||
@@ -79,9 +80,8 @@ await tick() | ||
export default function clusterByKey (bucket, keyer, options={}) { | ||
options = { | ||
options = Object.assign({ | ||
tickMs: 8, | ||
nIterationsBetweenTickChecks: 0xfff, // must be power of two, minus one | ||
...options | ||
} | ||
nIterationsBetweenTickChecks: 0xfff // must be power of two, minus one | ||
}, options) | ||
return new KeyClusterer(bucket, keyer, options) | ||
} |
@@ -227,3 +227,5 @@ function stripLatin1Accent (c) { | ||
let lastToken = null | ||
for (const token of tokens) { | ||
const nTokens = tokens.length | ||
for (let i = 0; i < nTokens; i++) { | ||
const token = tokens[i] | ||
if (token !== lastToken) { | ||
@@ -230,0 +232,0 @@ uniqueTokens.push(token) |
@@ -83,9 +83,8 @@ import { tick } from '../util' | ||
export default function clusterByKnn (bucket, distance, radius, options={}) { | ||
options = { | ||
options = Object.assign({ | ||
tickMs: 8, | ||
nIterationsBetweenTickChecks: 0xfff, // must be power of two, minus one | ||
...options | ||
} | ||
nIterationsBetweenTickChecks: 0xfff // must be power of two, minus one | ||
}, options) | ||
return new KnnClusterer(bucket, distance, radius, options) | ||
} |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
93882
1136