Socket
Socket
Sign inDemoInstall

efrt

Package Overview
Dependencies
Maintainers
1
Versions
24
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

efrt - npm Package Compare versions

Comparing version 0.0.7 to 1.0.0

198

builds/efrt-unpack.es5.js

@@ -58,150 +58,21 @@ (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.unpack = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(_dereq_,module,exports){

var Ptrie = _dereq_('./ptrie');
var unpack = _dereq_('./unpack');
module.exports = function (str) {
return new Ptrie(str);
};
},{"./ptrie":5}],3:[function(_dereq_,module,exports){
'use strict';
var encoding = _dereq_('../encoding');
var isPrefix = _dereq_('./prefix');
var unravel = _dereq_('./unravel');
var methods = {
// Return largest matching string in the dictionary (or '')
has: function has(want) {
//fail-fast
if (!want) {
return false;
module.exports = function (obj) {
if (typeof obj === 'string') {
obj = JSON.parse(obj); //weee!
}
var all = {};
Object.keys(obj).forEach(function (cat) {
var arr = unpack(obj[cat]);
for (var i = 0; i < arr.length; i++) {
all[arr[i]] = cat;
}
//then, try cache-lookup
if (this._cache) {
if (this._cache.hasOwnProperty(want) === true) {
return this._cache[want];
}
return false;
}
var self = this;
var crawl = function crawl(index, prefix) {
var node = self.nodes[index];
//the '!' means a prefix-alone is a good match
if (node[0] === '!') {
//try to match the prefix (the last branch)
if (prefix === want) {
return true;
}
node = node.slice(1); //ok, we tried. remove it.
}
//each possible match on this line is something like 'me,me2,me4'.
//try each one
var matches = node.split(/([A-Z0-9,]+)/g);
for (var i = 0; i < matches.length; i += 2) {
var str = matches[i];
var ref = matches[i + 1];
if (!str) {
continue;
}
var have = prefix + str;
//we're at the branch's end, so try to match it
if (ref === ',' || ref === undefined) {
if (have === want) {
return true;
}
continue;
}
//ok, not a match.
//well, should we keep going on this branch?
//if we do, we ignore all the others here.
if (isPrefix(have, want)) {
index = self.indexFromRef(ref, index);
return crawl(index, have);
}
//nah, lets try the next branch..
continue;
}
return false;
};
return crawl(0, '');
},
// References are either absolute (symbol) or relative (1 - based)
indexFromRef: function indexFromRef(ref, index) {
var dnode = encoding.fromAlphaCode(ref);
if (dnode < this.symCount) {
return this.syms[dnode];
}
return index + dnode + 1 - this.symCount;
},
toArray: function toArray() {
return Object.keys(this.toObject());
},
toObject: function toObject() {
if (this._cache) {
return this._cache;
}
return unravel(this);
},
cache: function cache() {
this._cache = unravel(this);
this.nodes = null;
this.syms = null;
}
});
return all;
};
module.exports = methods;
},{"../encoding":1,"./prefix":4,"./unravel":7}],4:[function(_dereq_,module,exports){
},{"./unpack":4}],3:[function(_dereq_,module,exports){
'use strict';
//are we on the right path with this string?
module.exports = function (str, want) {
//allow perfect equals
if (str === want) {
return true;
}
//compare lengths
var len = str.length;
if (len >= want.length) {
return false;
}
//quick slice
if (len === 1) {
return str === want[0];
}
return want.slice(0, len) === str;
};
// console.log(module.exports('harvar', 'harvard'));
},{}],5:[function(_dereq_,module,exports){
'use strict';
var parseSymbols = _dereq_('./symbols');
var methods = _dereq_('./methods');
//PackedTrie - Trie traversal of the Trie packed-string representation.
var PackedTrie = function PackedTrie(str) {
this.nodes = str.split(';'); //that's all ;)!
this.syms = [];
this.symCount = 0;
this._cache = null;
//process symbols, if they have them
if (str.match(':')) {
parseSymbols(this);
}
};
Object.keys(methods).forEach(function (k) {
PackedTrie.prototype[k] = methods[k];
});
module.exports = PackedTrie;
},{"./methods":3,"./symbols":6}],6:[function(_dereq_,module,exports){
'use strict';
var encoding = _dereq_('../encoding');

@@ -225,12 +96,23 @@

},{"../encoding":1}],7:[function(_dereq_,module,exports){
},{"../encoding":1}],4:[function(_dereq_,module,exports){
'use strict';
//spin-out all words from this trie
module.exports = function (trie) {
var all = {};
var parseSymbols = _dereq_('./symbols');
var encoding = _dereq_('../encoding');
// References are either absolute (symbol) or relative (1 - based)
var indexFromRef = function indexFromRef(trie, ref, index) {
var dnode = encoding.fromAlphaCode(ref);
if (dnode < trie.symCount) {
return trie.syms[dnode];
}
return index + dnode + 1 - trie.symCount;
};
var toArray = function toArray(trie) {
var all = [];
var crawl = function crawl(index, pref) {
var node = trie.nodes[index];
if (node[0] === '!') {
all[pref] = true;
all.push(pref);
node = node.slice(1); //ok, we tried. remove it.

@@ -249,6 +131,6 @@ }

if (ref === ',' || ref === undefined) {
all[have] = true;
all.push(have);
continue;
}
var newIndex = trie.indexFromRef(ref, index);
var newIndex = indexFromRef(trie, ref, index);
crawl(newIndex, have);

@@ -261,3 +143,19 @@ }

},{}]},{},[2])(2)
//PackedTrie - Trie traversal of the Trie packed-string representation.
var unpack = function unpack(str) {
var trie = {
nodes: str.split(';'), //that's all ;)!
syms: [],
symCount: 0
};
//process symbols, if they have them
if (str.match(':')) {
parseSymbols(trie);
}
return toArray(trie);
};
module.exports = unpack;
},{"../encoding":1,"./symbols":3}]},{},[2])(2)
});

@@ -1,2 +0,2 @@

/* efrt trie-compression v0.0.7 github.com/nlp-compromise/efrt - MIT */
/* efrt trie-compression v1.0.0 github.com/nlp-compromise/efrt - MIT */
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.unpack = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(_dereq_,module,exports){

@@ -60,146 +60,20 @@ 'use strict';

'use strict';
const Ptrie = _dereq_('./ptrie');
const unpack = _dereq_('./unpack');
module.exports = function(str) {
return new Ptrie(str);
};
},{"./ptrie":5}],3:[function(_dereq_,module,exports){
'use strict';
const encoding = _dereq_('../encoding');
const isPrefix = _dereq_('./prefix');
const unravel = _dereq_('./unravel');
const methods = {
// Return largest matching string in the dictionary (or '')
has: function(want) {
//fail-fast
if (!want) {
return false;
module.exports = function(obj) {
if (typeof obj === 'string') {
obj = JSON.parse(obj); //weee!
}
let all = {};
Object.keys(obj).forEach(function(cat) {
let arr = unpack(obj[cat]);
for (var i = 0; i < arr.length; i++) {
all[arr[i]] = cat;
}
//then, try cache-lookup
if (this._cache) {
if (this._cache.hasOwnProperty(want) === true) {
return this._cache[want];
}
return false;
}
let self = this;
const crawl = function(index, prefix) {
let node = self.nodes[index];
//the '!' means a prefix-alone is a good match
if (node[0] === '!') {
//try to match the prefix (the last branch)
if (prefix === want) {
return true;
}
node = node.slice(1); //ok, we tried. remove it.
}
//each possible match on this line is something like 'me,me2,me4'.
//try each one
const matches = node.split(/([A-Z0-9,]+)/g);
for (let i = 0; i < matches.length; i += 2) {
const str = matches[i];
const ref = matches[i + 1];
if (!str) {
continue;
}
const have = prefix + str;
//we're at the branch's end, so try to match it
if (ref === ',' || ref === undefined) {
if (have === want) {
return true;
}
continue;
}
//ok, not a match.
//well, should we keep going on this branch?
//if we do, we ignore all the others here.
if (isPrefix(have, want)) {
index = self.indexFromRef(ref, index);
return crawl(index, have);
}
//nah, lets try the next branch..
continue;
}
return false;
};
return crawl(0, '');
},
// References are either absolute (symbol) or relative (1 - based)
indexFromRef: function(ref, index) {
const dnode = encoding.fromAlphaCode(ref);
if (dnode < this.symCount) {
return this.syms[dnode];
}
return index + dnode + 1 - this.symCount;
},
toArray: function() {
return Object.keys(this.toObject());
},
toObject: function() {
if (this._cache) {
return this._cache;
}
return unravel(this);
},
cache: function() {
this._cache = unravel(this);
this.nodes = null;
this.syms = null;
}
});
return all;
};
module.exports = methods;
},{"../encoding":1,"./prefix":4,"./unravel":7}],4:[function(_dereq_,module,exports){
},{"./unpack":4}],3:[function(_dereq_,module,exports){
'use strict';
//are we on the right path with this string?
module.exports = function(str, want) {
//allow perfect equals
if (str === want) {
return true;
}
//compare lengths
let len = str.length;
if (len >= want.length) {
return false;
}
//quick slice
if (len === 1) {
return str === want[0];
}
return want.slice(0, len) === str;
};
// console.log(module.exports('harvar', 'harvard'));
},{}],5:[function(_dereq_,module,exports){
'use strict';
const parseSymbols = _dereq_('./symbols');
const methods = _dereq_('./methods');
//PackedTrie - Trie traversal of the Trie packed-string representation.
const PackedTrie = function(str) {
this.nodes = str.split(';'); //that's all ;)!
this.syms = [];
this.symCount = 0;
this._cache = null;
//process symbols, if they have them
if (str.match(':')) {
parseSymbols(this);
}
};
Object.keys(methods).forEach(function(k) {
PackedTrie.prototype[k] = methods[k];
});
module.exports = PackedTrie;
},{"./methods":3,"./symbols":6}],6:[function(_dereq_,module,exports){
'use strict';
const encoding = _dereq_('../encoding');

@@ -223,11 +97,22 @@

},{"../encoding":1}],7:[function(_dereq_,module,exports){
},{"../encoding":1}],4:[function(_dereq_,module,exports){
'use strict';
//spin-out all words from this trie
module.exports = function(trie) {
let all = {};
const crawl = function(index, pref) {
const parseSymbols = _dereq_('./symbols');
const encoding = _dereq_('../encoding');
// References are either absolute (symbol) or relative (1 - based)
const indexFromRef = function(trie, ref, index) {
const dnode = encoding.fromAlphaCode(ref);
if (dnode < trie.symCount) {
return trie.syms[dnode];
}
return index + dnode + 1 - trie.symCount;
};
const toArray = function(trie) {
let all = [];
const crawl = (index, pref) => {
let node = trie.nodes[index];
if (node[0] === '!') {
all[pref] = true;
all.push(pref);
node = node.slice(1); //ok, we tried. remove it.

@@ -246,6 +131,6 @@ }

if (ref === ',' || ref === undefined) {
all[have] = true;
all.push(have);
continue;
}
let newIndex = trie.indexFromRef(ref, index);
let newIndex = indexFromRef(trie, ref, index);
crawl(newIndex, have);

@@ -258,3 +143,19 @@ }

},{}]},{},[2])(2)
//PackedTrie - Trie traversal of the Trie packed-string representation.
const unpack = function(str) {
let trie = {
nodes: str.split(';'), //that's all ;)!
syms: [],
symCount: 0
};
//process symbols, if they have them
if (str.match(':')) {
parseSymbols(trie);
}
return toArray(trie);
};
module.exports = unpack;
},{"../encoding":1,"./symbols":3}]},{},[2])(2)
});

@@ -1,2 +0,2 @@

/* efrt trie-compression v0.0.7 github.com/nlp-compromise/efrt - MIT */
!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var t;t="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,t.unpack=e()}}(function(){return function e(t,n,r){function o(s,f){if(!n[s]){if(!t[s]){var u="function"==typeof require&&require;if(!f&&u)return u(s,!0);if(i)return i(s,!0);var c=new Error("Cannot find module '"+s+"'");throw c.code="MODULE_NOT_FOUND",c}var a=n[s]={exports:{}};t[s][0].call(a.exports,function(e){var n=t[s][1][e];return o(n?n:e)},a,a.exports,e,t,n,r)}return n[s].exports}for(var i="function"==typeof require&&require,s=0;s<r.length;s++)o(r[s]);return o}({1:[function(e,t,n){"use strict";var r=36,o="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",i=o.split("").reduce(function(e,t,n){return e[t]=n,e},{}),s=function(e){if(void 0!==o[e])return o[e];for(var t=1,n=r,i="";e>=n;e-=n,t++,n*=r);for(;t--;){var s=e%r;i=String.fromCharCode((s<10?48:55)+s)+i,e=(e-s)/r}return i},f=function(e){if(void 0!==i[e])return i[e];for(var t=0,n=1,o=r,s=1;n<e.length;t+=o,n++,o*=r);for(var f=e.length-1;f>=0;f--,s*=r){var u=e.charCodeAt(f)-48;u>10&&(u-=7),t+=u*s}return t};t.exports={toAlphaCode:s,fromAlphaCode:f}},{}],2:[function(e,t,n){"use strict";var r=e("./ptrie");t.exports=function(e){return new r(e)}},{"./ptrie":5}],3:[function(e,t,n){"use strict";var r=e("../encoding"),o=e("./prefix"),i=e("./unravel"),s={has:function(e){if(!e)return!1;if(this._cache)return this._cache.hasOwnProperty(e)===!0&&this._cache[e];var t=this,n=function n(r,i){var s=t.nodes[r];if("!"===s[0]){if(i===e)return!0;s=s.slice(1)}for(var f=s.split(/([A-Z0-9,]+)/g),u=0;u<f.length;u+=2){var c=f[u],a=f[u+1];if(c){var h=i+c;if(","!==a&&void 0!==a){if(o(h,e))return r=t.indexFromRef(a,r),n(r,h)}else if(h===e)return!0}}return!1};return n(0,"")},indexFromRef:function(e,t){var n=r.fromAlphaCode(e);return n<this.symCount?this.syms[n]:t+n+1-this.symCount},toArray:function(){return Object.keys(this.toObject())},toObject:function(){return this._cache?this._cache:i(this)},cache:function(){this._cache=i(this),this.nodes=null,this.syms=null}};t.exports=s},{"../encoding":1,"./prefix":4,"./unravel":7}],4:[function(e,t,n){"use strict";t.exports=function(e,t){if(e===t)return!0;var n=e.length;return!(n>=t.length)&&(1===n?e===t[0]:t.slice(0,n)===e)}},{}],5:[function(e,t,n){"use strict";var r=e("./symbols"),o=e("./methods"),i=function(e){this.nodes=e.split(";"),this.syms=[],this.symCount=0,this._cache=null,e.match(":")&&r(this)};Object.keys(o).forEach(function(e){i.prototype[e]=o[e]}),t.exports=i},{"./methods":3,"./symbols":6}],6:[function(e,t,n){"use strict";var r=e("../encoding");t.exports=function(e){for(var t=new RegExp("([0-9A-Z]+):([0-9A-Z]+)"),n=0;n<e.nodes.length;n++){var o=t.exec(e.nodes[n]);if(!o){e.symCount=n;break}e.syms[r.fromAlphaCode(o[1])]=r.fromAlphaCode(o[2])}e.nodes=e.nodes.slice(e.symCount,e.nodes.length)}},{"../encoding":1}],7:[function(e,t,n){"use strict";t.exports=function(e){var t={},n=function n(r,o){var i=e.nodes[r];"!"===i[0]&&(t[o]=!0,i=i.slice(1));for(var s=i.split(/([A-Z0-9,]+)/g),f=0;f<s.length;f+=2){var u=s[f],c=s[f+1];if(u){var a=o+u;if(","!==c&&void 0!==c){var h=e.indexFromRef(c,r);n(h,a)}else t[a]=!0}}};return n(0,""),t}},{}]},{},[2])(2)});
/* efrt trie-compression v1.0.0 github.com/nlp-compromise/efrt - MIT */
!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var n;n="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,n.unpack=e()}}(function(){return function e(n,r,o){function t(i,u){if(!r[i]){if(!n[i]){var s="function"==typeof require&&require;if(!u&&s)return s(i,!0);if(f)return f(i,!0);var a=new Error("Cannot find module '"+i+"'");throw a.code="MODULE_NOT_FOUND",a}var c=r[i]={exports:{}};n[i][0].call(c.exports,function(e){var r=n[i][1][e];return t(r?r:e)},c,c.exports,e,n,r,o)}return r[i].exports}for(var f="function"==typeof require&&require,i=0;i<o.length;i++)t(o[i]);return t}({1:[function(e,n,r){"use strict";var o=36,t="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",f=t.split("").reduce(function(e,n,r){return e[n]=r,e},{}),i=function(e){if(void 0!==t[e])return t[e];for(var n=1,r=o,f="";e>=r;e-=r,n++,r*=o);for(;n--;){var i=e%o;f=String.fromCharCode((i<10?48:55)+i)+f,e=(e-i)/o}return f},u=function(e){if(void 0!==f[e])return f[e];for(var n=0,r=1,t=o,i=1;r<e.length;n+=t,r++,t*=o);for(var u=e.length-1;u>=0;u--,i*=o){var s=e.charCodeAt(u)-48;s>10&&(s-=7),n+=s*i}return n};n.exports={toAlphaCode:i,fromAlphaCode:u}},{}],2:[function(e,n,r){"use strict";var o=e("./unpack");n.exports=function(e){"string"==typeof e&&(e=JSON.parse(e));var n={};return Object.keys(e).forEach(function(r){for(var t=o(e[r]),f=0;f<t.length;f++)n[t[f]]=r}),n}},{"./unpack":4}],3:[function(e,n,r){"use strict";var o=e("../encoding");n.exports=function(e){for(var n=new RegExp("([0-9A-Z]+):([0-9A-Z]+)"),r=0;r<e.nodes.length;r++){var t=n.exec(e.nodes[r]);if(!t){e.symCount=r;break}e.syms[o.fromAlphaCode(t[1])]=o.fromAlphaCode(t[2])}e.nodes=e.nodes.slice(e.symCount,e.nodes.length)}},{"../encoding":1}],4:[function(e,n,r){"use strict";var o=e("./symbols"),t=e("../encoding"),f=function(e,n,r){var o=t.fromAlphaCode(n);return o<e.symCount?e.syms[o]:r+o+1-e.symCount},i=function(e){var n=[],r=function r(o,t){var i=e.nodes[o];"!"===i[0]&&(n.push(t),i=i.slice(1));for(var u=i.split(/([A-Z0-9,]+)/g),s=0;s<u.length;s+=2){var a=u[s],c=u[s+1];if(a){var d=t+a;if(","!==c&&void 0!==c){var p=f(e,c,o);r(p,d)}else n.push(d)}}};return r(0,""),n},u=function(e){var n={nodes:e.split(";"),syms:[],symCount:0};return e.match(":")&&o(n),i(n)};n.exports=u},{"../encoding":1,"./symbols":3}]},{},[2])(2)});

@@ -1,17 +0,5 @@

/* efrt trie-compression v0.0.7 github.com/nlp-compromise/efrt - MIT */
/* efrt trie-compression v1.0.0 github.com/nlp-compromise/efrt - MIT */
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.efrt = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(_dereq_,module,exports){
'use strict';
module.exports = {
NODE_SEP: ';',
STRING_SEP: ',',
TERMINAL_PREFIX: '!',
//characters banned from entering the trie
NOT_ALLOWED: new RegExp('[0-9A-Z,;!]'),
BASE: 36
};
},{}],2:[function(_dereq_,module,exports){
'use strict';
var BASE = 36;

@@ -68,3 +56,3 @@

},{}],3:[function(_dereq_,module,exports){
},{}],2:[function(_dereq_,module,exports){
(function (global){

@@ -96,3 +84,3 @@ 'use strict';

}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {})
},{"./pack/index":6,"./unpack/index":10}],4:[function(_dereq_,module,exports){
},{"./pack/index":5,"./unpack/index":9}],3:[function(_dereq_,module,exports){
'use strict';

@@ -127,72 +115,48 @@

},{}],5:[function(_dereq_,module,exports){
},{}],4:[function(_dereq_,module,exports){
'use strict';
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
var Histogram = function Histogram() {
this.counts = {};
};
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
var Histogram = function () {
function Histogram() {
_classCallCheck(this, Histogram);
this.counts = {};
}
_createClass(Histogram, [{
key: 'init',
value: function init(sym) {
if (this.counts[sym] === undefined) {
this.counts[sym] = 0;
}
var methods = {
init: function init(sym) {
if (this.counts[sym] === undefined) {
this.counts[sym] = 0;
}
}, {
key: 'add',
value: function add(sym, n) {
if (n === undefined) {
n = 1;
}
this.init(sym);
this.counts[sym] += n;
},
add: function add(sym, n) {
if (n === undefined) {
n = 1;
}
}, {
key: 'change',
value: function change(symNew, symOld, n) {
if (n === undefined) {
n = 1;
}
this.add(symOld, -n);
this.add(symNew, n);
this.init(sym);
this.counts[sym] += n;
},
countOf: function countOf(sym) {
this.init(sym);
return this.counts[sym];
},
highest: function highest(top) {
var sorted = [];
var keys = Object.keys(this.counts);
for (var i = 0; i < keys.length; i++) {
var sym = keys[i];
sorted.push([sym, this.counts[sym]]);
}
}, {
key: 'countOf',
value: function countOf(sym) {
this.init(sym);
return this.counts[sym];
sorted.sort(function (a, b) {
return b[1] - a[1];
});
if (top) {
sorted = sorted.slice(0, top);
}
}, {
key: 'highest',
value: function highest(top) {
var sorted = [];
var keys = Object.keys(this.counts);
for (var i = 0; i < keys.length; i++) {
var sym = keys[i];
sorted.push([sym, this.counts[sym]]);
}
sorted.sort(function (a, b) {
return b[1] - a[1];
});
if (top) {
sorted = sorted.slice(0, top);
}
return sorted;
}
}]);
return Histogram;
}();
return sorted;
}
};
Object.keys(methods).forEach(function (k) {
Histogram.prototype[k] = methods[k];
});
module.exports = Histogram;
},{}],6:[function(_dereq_,module,exports){
},{}],5:[function(_dereq_,module,exports){
'use strict';

@@ -202,10 +166,46 @@

var handleFormats = function handleFormats(input) {
//null
if (input === null || input === undefined) {
return {};
}
//string
if (typeof input === 'string') {
return input.split(/ +/g).reduce(function (h, str) {
h[str] = true;
return h;
}, {});
}
//array
if (Object.prototype.toString.call(input) === '[object Array]') {
return input.reduce(function (h, str) {
h[str] = true;
return h;
}, {});
}
//object
return input;
};
//turn an array into a compressed string
var pack = function pack(arr) {
var t = new Trie(arr);
return t.pack();
var pack = function pack(obj) {
obj = handleFormats(obj);
//pivot into categories:
var flat = Object.keys(obj).reduce(function (h, k) {
var val = obj[k];
h[val] = h[val] || [];
h[val].push(k);
return h;
}, {});
//pack each into a compressed string
Object.keys(flat).forEach(function (k) {
var t = new Trie(flat[k]);
flat[k] = t.pack();
});
flat = JSON.stringify(flat, null, 0);
return flat;
};
module.exports = pack;
},{"./trie":9}],7:[function(_dereq_,module,exports){
},{"./trie":8}],6:[function(_dereq_,module,exports){
'use strict';

@@ -217,3 +217,3 @@

var _pack = _dereq_('./pack');
var config = _dereq_('../config');
var NOT_ALLOWED = new RegExp('[0-9A-Z,;!]'); //characters banned from entering the trie

@@ -234,3 +234,3 @@ module.exports = {

for (var _i = 0; _i < words.length; _i++) {
if (words[_i].match(config.NOT_ALLOWED) === null) {
if (words[_i].match(NOT_ALLOWED) === null) {
this.insert(words[_i]);

@@ -427,6 +427,2 @@ }

has: function has(word) {
return this.isFragment(word, this.root);
},
isTerminal: function isTerminal(node) {

@@ -436,24 +432,2 @@ return !!node[''];

isFragment: function isFragment(word, node) {
if (word.length === 0) {
return this.isTerminal(node);
}
if (node[word] === 1) {
return true;
}
// Find a prefix of word reference to a child
var props = this.nodeProps(node, true);
for (var i = 0; i < props.length; i++) {
var prop = props[i];
if (prop === word.slice(0, prop.length)) {
return this.isFragment(word.slice(prop.length), node[prop]);
}
}
return false;
},
// Find highest node in Trie that is on the path to word

@@ -480,8 +454,13 @@ // and that is NOT on the path to other.

},{"../config":1,"./fns":4,"./pack":8}],8:[function(_dereq_,module,exports){
},{"./fns":3,"./pack":7}],7:[function(_dereq_,module,exports){
'use strict';
var Histogram = _dereq_('./histogram');
var config = _dereq_('../config');
var encoding = _dereq_('../encoding');
var config = {
NODE_SEP: ';',
STRING_SEP: ',',
TERMINAL_PREFIX: '!',
BASE: 36
};

@@ -519,3 +498,2 @@ // Return packed representation of Trie as a string.

var nodeLine = function nodeLine(self, node) {

@@ -642,3 +620,3 @@ var line = '',

},{"../config":1,"../encoding":2,"./histogram":5}],9:[function(_dereq_,module,exports){
},{"../encoding":1,"./histogram":4}],8:[function(_dereq_,module,exports){
'use strict';

@@ -679,153 +657,24 @@

},{"./methods":7}],10:[function(_dereq_,module,exports){
},{"./methods":6}],9:[function(_dereq_,module,exports){
'use strict';
var Ptrie = _dereq_('./ptrie');
var unpack = _dereq_('./unpack');
module.exports = function (str) {
return new Ptrie(str);
};
},{"./ptrie":13}],11:[function(_dereq_,module,exports){
'use strict';
var encoding = _dereq_('../encoding');
var isPrefix = _dereq_('./prefix');
var unravel = _dereq_('./unravel');
var methods = {
// Return largest matching string in the dictionary (or '')
has: function has(want) {
//fail-fast
if (!want) {
return false;
module.exports = function (obj) {
if (typeof obj === 'string') {
obj = JSON.parse(obj); //weee!
}
var all = {};
Object.keys(obj).forEach(function (cat) {
var arr = unpack(obj[cat]);
for (var i = 0; i < arr.length; i++) {
all[arr[i]] = cat;
}
//then, try cache-lookup
if (this._cache) {
if (this._cache.hasOwnProperty(want) === true) {
return this._cache[want];
}
return false;
}
var self = this;
var crawl = function crawl(index, prefix) {
var node = self.nodes[index];
//the '!' means a prefix-alone is a good match
if (node[0] === '!') {
//try to match the prefix (the last branch)
if (prefix === want) {
return true;
}
node = node.slice(1); //ok, we tried. remove it.
}
//each possible match on this line is something like 'me,me2,me4'.
//try each one
var matches = node.split(/([A-Z0-9,]+)/g);
for (var i = 0; i < matches.length; i += 2) {
var str = matches[i];
var ref = matches[i + 1];
if (!str) {
continue;
}
var have = prefix + str;
//we're at the branch's end, so try to match it
if (ref === ',' || ref === undefined) {
if (have === want) {
return true;
}
continue;
}
//ok, not a match.
//well, should we keep going on this branch?
//if we do, we ignore all the others here.
if (isPrefix(have, want)) {
index = self.indexFromRef(ref, index);
return crawl(index, have);
}
//nah, lets try the next branch..
continue;
}
return false;
};
return crawl(0, '');
},
// References are either absolute (symbol) or relative (1 - based)
indexFromRef: function indexFromRef(ref, index) {
var dnode = encoding.fromAlphaCode(ref);
if (dnode < this.symCount) {
return this.syms[dnode];
}
return index + dnode + 1 - this.symCount;
},
toArray: function toArray() {
return Object.keys(this.toObject());
},
toObject: function toObject() {
if (this._cache) {
return this._cache;
}
return unravel(this);
},
cache: function cache() {
this._cache = unravel(this);
this.nodes = null;
this.syms = null;
}
});
return all;
};
module.exports = methods;
},{"../encoding":2,"./prefix":12,"./unravel":15}],12:[function(_dereq_,module,exports){
},{"./unpack":11}],10:[function(_dereq_,module,exports){
'use strict';
//are we on the right path with this string?
module.exports = function (str, want) {
//allow perfect equals
if (str === want) {
return true;
}
//compare lengths
var len = str.length;
if (len >= want.length) {
return false;
}
//quick slice
if (len === 1) {
return str === want[0];
}
return want.slice(0, len) === str;
};
// console.log(module.exports('harvar', 'harvard'));
},{}],13:[function(_dereq_,module,exports){
'use strict';
var parseSymbols = _dereq_('./symbols');
var methods = _dereq_('./methods');
//PackedTrie - Trie traversal of the Trie packed-string representation.
var PackedTrie = function PackedTrie(str) {
this.nodes = str.split(';'); //that's all ;)!
this.syms = [];
this.symCount = 0;
this._cache = null;
//process symbols, if they have them
if (str.match(':')) {
parseSymbols(this);
}
};
Object.keys(methods).forEach(function (k) {
PackedTrie.prototype[k] = methods[k];
});
module.exports = PackedTrie;
},{"./methods":11,"./symbols":14}],14:[function(_dereq_,module,exports){
'use strict';
var encoding = _dereq_('../encoding');

@@ -849,12 +698,23 @@

},{"../encoding":2}],15:[function(_dereq_,module,exports){
},{"../encoding":1}],11:[function(_dereq_,module,exports){
'use strict';
//spin-out all words from this trie
module.exports = function (trie) {
var all = {};
var parseSymbols = _dereq_('./symbols');
var encoding = _dereq_('../encoding');
// References are either absolute (symbol) or relative (1 - based)
var indexFromRef = function indexFromRef(trie, ref, index) {
var dnode = encoding.fromAlphaCode(ref);
if (dnode < trie.symCount) {
return trie.syms[dnode];
}
return index + dnode + 1 - trie.symCount;
};
var toArray = function toArray(trie) {
var all = [];
var crawl = function crawl(index, pref) {
var node = trie.nodes[index];
if (node[0] === '!') {
all[pref] = true;
all.push(pref);
node = node.slice(1); //ok, we tried. remove it.

@@ -873,6 +733,6 @@ }

if (ref === ',' || ref === undefined) {
all[have] = true;
all.push(have);
continue;
}
var newIndex = trie.indexFromRef(ref, index);
var newIndex = indexFromRef(trie, ref, index);
crawl(newIndex, have);

@@ -885,3 +745,19 @@ }

},{}]},{},[3])(3)
//PackedTrie - Trie traversal of the Trie packed-string representation.
var unpack = function unpack(str) {
var trie = {
nodes: str.split(';'), //that's all ;)!
syms: [],
symCount: 0
};
//process symbols, if they have them
if (str.match(':')) {
parseSymbols(trie);
}
return toArray(trie);
};
module.exports = unpack;
},{"../encoding":1,"./symbols":10}]},{},[2])(2)
});

@@ -1,2 +0,2 @@

/* efrt trie-compression v0.0.7 github.com/nlp-compromise/efrt - MIT */
!function(t){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=t();else if("function"==typeof define&&define.amd)define([],t);else{var n;n="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,n.efrt=t()}}(function(){var t;return function t(n,e,i){function o(s,u){if(!e[s]){if(!n[s]){var f="function"==typeof require&&require;if(!u&&f)return f(s,!0);if(r)return r(s,!0);var c=new Error("Cannot find module '"+s+"'");throw c.code="MODULE_NOT_FOUND",c}var h=e[s]={exports:{}};n[s][0].call(h.exports,function(t){var e=n[s][1][t];return o(e?e:t)},h,h.exports,t,n,e,i)}return e[s].exports}for(var r="function"==typeof require&&require,s=0;s<i.length;s++)o(i[s]);return o}({1:[function(t,n,e){"use strict";n.exports={NODE_SEP:";",STRING_SEP:",",TERMINAL_PREFIX:"!",NOT_ALLOWED:new RegExp("[0-9A-Z,;!]"),BASE:36}},{}],2:[function(t,n,e){"use strict";var i=36,o="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",r=o.split("").reduce(function(t,n,e){return t[n]=e,t},{}),s=function(t){if(void 0!==o[t])return o[t];for(var n=1,e=i,r="";t>=e;t-=e,n++,e*=i);for(;n--;){var s=t%i;r=String.fromCharCode((s<10?48:55)+s)+r,t=(t-s)/i}return r},u=function(t){if(void 0!==r[t])return r[t];for(var n=0,e=1,o=i,s=1;e<t.length;n+=o,e++,o*=i);for(var u=t.length-1;u>=0;u--,s*=i){var f=t.charCodeAt(u)-48;f>10&&(f-=7),n+=f*s}return n};n.exports={toAlphaCode:s,fromAlphaCode:u}},{}],3:[function(n,e,i){(function(i){"use strict";var o={pack:n("./pack/index"),unpack:n("./unpack/index")};"undefined"!=typeof self?self.efrt=o:"undefined"!=typeof window?window.efrt=o:"undefined"!=typeof i&&(i.efrt=o),"function"==typeof t&&t.amd&&t(o),"undefined"!=typeof e&&(e.exports=o)}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{"./pack/index":6,"./unpack/index":10}],4:[function(t,n,e){"use strict";var i=function(t,n){for(var e=Math.min(t.length,n.length);e>0;){var i=t.slice(0,e);if(i===n.slice(0,e))return i;e-=1}return""},o=function(t){t.sort();for(var n=1;n<t.length;n++)t[n-1]===t[n]&&t.splice(n,1)};n.exports={commonPrefix:i,unique:o}},{}],5:[function(t,n,e){"use strict";function i(t,n){if(!(t instanceof n))throw new TypeError("Cannot call a class as a function")}var o=function(){function t(t,n){for(var e=0;e<n.length;e++){var i=n[e];i.enumerable=i.enumerable||!1,i.configurable=!0,"value"in i&&(i.writable=!0),Object.defineProperty(t,i.key,i)}}return function(n,e,i){return e&&t(n.prototype,e),i&&t(n,i),n}}(),r=function(){function t(){i(this,t),this.counts={}}return o(t,[{key:"init",value:function(t){void 0===this.counts[t]&&(this.counts[t]=0)}},{key:"add",value:function(t,n){void 0===n&&(n=1),this.init(t),this.counts[t]+=n}},{key:"change",value:function(t,n,e){void 0===e&&(e=1),this.add(n,-e),this.add(t,e)}},{key:"countOf",value:function(t){return this.init(t),this.counts[t]}},{key:"highest",value:function(t){for(var n=[],e=Object.keys(this.counts),i=0;i<e.length;i++){var o=e[i];n.push([o,this.counts[o]])}return n.sort(function(t,n){return n[1]-t[1]}),t&&(n=n.slice(0,t)),n}}]),t}();n.exports=r},{}],6:[function(t,n,e){"use strict";var i=t("./trie"),o=function(t){var n=new i(t);return n.pack()};n.exports=o},{"./trie":9}],7:[function(t,n,e){"use strict";var i="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},o=t("./fns"),r=t("./pack"),s=t("../config");n.exports={insertWords:function(t){if(void 0!==t){"string"==typeof t&&(t=t.split(/[^a-zA-Z]+/));for(var n=0;n<t.length;n++)t[n]=t[n].toLowerCase();o.unique(t);for(var e=0;e<t.length;e++)null===t[e].match(s.NOT_ALLOWED)&&this.insert(t[e])}},insert:function(t){this._insert(t,this.root);var n=this.lastWord;this.lastWord=t;var e=o.commonPrefix(t,n);if(e!==n){var i=this.uniqueNode(n,t,this.root);i&&this.combineSuffixNode(i)}},_insert:function(t,n){var e=void 0,r=void 0;if(0!==t.length){for(var s=Object.keys(n),u=0;u<s.length;u++){var f=s[u];if(e=o.commonPrefix(t,f),0!==e.length){if(f===e&&"object"===i(n[f]))return void this._insert(t.slice(e.length),n[f]);if(f===t&&"number"==typeof n[f])return;return r={},r[f.slice(e.length)]=n[f],this.addTerminal(r,t=t.slice(e.length)),delete n[f],n[e]=r,void this.wordCount++}}this.addTerminal(n,t),this.wordCount++}},addTerminal:function(t,n){if(n.length<=1)return void(t[n]=1);var e={};t[n[0]]=e,this.addTerminal(e,n.slice(1))},nodeProps:function(t,n){var e=[];for(var o in t)""!==o&&"_"!==o[0]&&(n&&"object"!==i(t[o])||e.push(o));return e.sort(),e},optimize:function(){this.combineSuffixNode(this.root),this.prepDFS(),this.countDegree(this.root),this.prepDFS(),this.collapseChains(this.root)},combineSuffixNode:function(t){if(t._c)return t;var n=[];this.isTerminal(t)&&n.push("!");for(var e=this.nodeProps(t),o=0;o<e.length;o++){var r=e[o];"object"===i(t[r])?(t[r]=this.combineSuffixNode(t[r]),n.push(r),n.push(t[r]._c)):n.push(r)}n=n.join("-");var s=this.suffixes[n];return s?s:(this.suffixes[n]=t,t._c=this.cNext++,t)},prepDFS:function(){this.vCur++},visited:function(t){return t._v===this.vCur||(t._v=this.vCur,!1)},countDegree:function(t){if(void 0===t._d&&(t._d=0),t._d++,!this.visited(t))for(var n=this.nodeProps(t,!0),e=0;e<n.length;e++)this.countDegree(t[n[e]])},collapseChains:function(t){var n=void 0,e=void 0,o=void 0,r=void 0;if(!this.visited(t)){for(e=this.nodeProps(t),r=0;r<e.length;r++)n=e[r],o=t[n],"object"===("undefined"==typeof o?"undefined":i(o))&&(this.collapseChains(o),void 0===o._g||1!==o._d&&1!==o._g.length||(delete t[n],n+=o._g,t[n]=o[o._g]));1!==e.length||this.isTerminal(t)||(t._g=n)}},has:function(t){return this.isFragment(t,this.root)},isTerminal:function(t){return!!t[""]},isFragment:function(t,n){if(0===t.length)return this.isTerminal(n);if(1===n[t])return!0;for(var e=this.nodeProps(n,!0),i=0;i<e.length;i++){var o=e[i];if(o===t.slice(0,o.length))return this.isFragment(t.slice(o.length),n[o])}return!1},uniqueNode:function(t,n,e){for(var i=this.nodeProps(e,!0),o=0;o<i.length;o++){var r=i[o];if(r===t.slice(0,r.length))return r!==n.slice(0,r.length)?e[r]:this.uniqueNode(t.slice(r.length),n.slice(r.length),e[r])}},pack:function(){return r(this)}}},{"../config":1,"./fns":4,"./pack":8}],8:[function(t,n,e){"use strict";var i=t("./histogram"),o=t("../config"),r=t("../encoding"),s=function(t,n){var e="",i="";t.isTerminal(n)&&(e+=o.TERMINAL_PREFIX);for(var s=t.nodeProps(n),u=0;u<s.length;u++){var f=s[u];if("number"!=typeof n[f])if(t.syms[n[f]._n])e+=i+f+t.syms[n[f]._n],i="";else{var c=r.toAlphaCode(n._n-n[f]._n-1+t.symCount);n[f]._g&&c.length>=n[f]._g.length&&1===n[n[f]._g]?(c=n[f]._g,e+=i+f+c,i=o.STRING_SEP):(e+=i+f+c,i="")}else e+=i+f,i=o.STRING_SEP}return e},u=function t(n,e){if(!n.visited(e))for(var i=n.nodeProps(e,!0),s=0;s<i.length;s++){var u=i[s],f=e._n-e[u]._n-1;f<o.BASE&&n.histRel.add(f),n.histAbs.add(e[u]._n,r.toAlphaCode(f).length-1),t(n,e[u])}},f=function(t){t.histAbs=t.histAbs.highest(o.BASE);var n=[];n[-1]=0;for(var e=0,i=0,s=3+r.toAlphaCode(t.nodeCount).length,u=0;u<o.BASE&&void 0!==t.histAbs[u];u++)n[u]=t.histAbs[u][1]-s-t.histRel.countOf(o.BASE-u-1)+n[u-1],n[u]>=e&&(e=n[u],i=u+1);return i},c=function t(n,e){if(void 0===e._n){for(var i=n.nodeProps(e,!0),o=0;o<i.length;o++)t(n,e[i[o]]);e._n=n.pos++,n.nodes.unshift(e)}},h=function(t){t.nodes=[],t.nodeCount=0,t.syms={},t.symCount=0,t.pos=0,t.optimize(),t.histAbs=new i,t.histRel=new i,c(t,t.root),t.nodeCount=t.nodes.length,t.prepDFS(),u(t,t.root),t.symCount=f(t);for(var n=0;n<t.symCount;n++)t.syms[t.histAbs[n][0]]=r.toAlphaCode(n);for(var e=0;e<t.nodeCount;e++)t.nodes[e]=s(t,t.nodes[e]);for(var h=t.symCount-1;h>=0;h--)t.nodes.unshift(r.toAlphaCode(h)+":"+r.toAlphaCode(t.nodeCount-t.histAbs[h][0]-1));return t.nodes.join(o.NODE_SEP)};n.exports=h},{"../config":1,"../encoding":2,"./histogram":5}],9:[function(t,n,e){"use strict";var i=t("./methods"),o=function(t){this.root={},this.lastWord="",this.suffixes={},this.suffixCounts={},this.cNext=1,this.wordCount=0,this.insertWords(t),this.vCur=0};Object.keys(i).forEach(function(t){o.prototype[t]=i[t]}),n.exports=o},{"./methods":7}],10:[function(t,n,e){"use strict";var i=t("./ptrie");n.exports=function(t){return new i(t)}},{"./ptrie":13}],11:[function(t,n,e){"use strict";var i=t("../encoding"),o=t("./prefix"),r=t("./unravel"),s={has:function(t){if(!t)return!1;if(this._cache)return this._cache.hasOwnProperty(t)===!0&&this._cache[t];var n=this,e=function e(i,r){var s=n.nodes[i];if("!"===s[0]){if(r===t)return!0;s=s.slice(1)}for(var u=s.split(/([A-Z0-9,]+)/g),f=0;f<u.length;f+=2){var c=u[f],h=u[f+1];if(c){var a=r+c;if(","!==h&&void 0!==h){if(o(a,t))return i=n.indexFromRef(h,i),e(i,a)}else if(a===t)return!0}}return!1};return e(0,"")},indexFromRef:function(t,n){var e=i.fromAlphaCode(t);return e<this.symCount?this.syms[e]:n+e+1-this.symCount},toArray:function(){return Object.keys(this.toObject())},toObject:function(){return this._cache?this._cache:r(this)},cache:function(){this._cache=r(this),this.nodes=null,this.syms=null}};n.exports=s},{"../encoding":2,"./prefix":12,"./unravel":15}],12:[function(t,n,e){"use strict";n.exports=function(t,n){if(t===n)return!0;var e=t.length;return!(e>=n.length)&&(1===e?t===n[0]:n.slice(0,e)===t)}},{}],13:[function(t,n,e){"use strict";var i=t("./symbols"),o=t("./methods"),r=function(t){this.nodes=t.split(";"),this.syms=[],this.symCount=0,this._cache=null,t.match(":")&&i(this)};Object.keys(o).forEach(function(t){r.prototype[t]=o[t]}),n.exports=r},{"./methods":11,"./symbols":14}],14:[function(t,n,e){"use strict";var i=t("../encoding");n.exports=function(t){for(var n=new RegExp("([0-9A-Z]+):([0-9A-Z]+)"),e=0;e<t.nodes.length;e++){var o=n.exec(t.nodes[e]);if(!o){t.symCount=e;break}t.syms[i.fromAlphaCode(o[1])]=i.fromAlphaCode(o[2])}t.nodes=t.nodes.slice(t.symCount,t.nodes.length)}},{"../encoding":2}],15:[function(t,n,e){"use strict";n.exports=function(t){var n={},e=function e(i,o){var r=t.nodes[i];"!"===r[0]&&(n[o]=!0,r=r.slice(1));for(var s=r.split(/([A-Z0-9,]+)/g),u=0;u<s.length;u+=2){var f=s[u],c=s[u+1];if(f){var h=o+f;if(","!==c&&void 0!==c){var a=t.indexFromRef(c,i);e(a,h)}else n[h]=!0}}};return e(0,""),n}},{}]},{},[3])(3)});
/* efrt trie-compression v1.0.0 github.com/nlp-compromise/efrt - MIT */
!function(t){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=t();else if("function"==typeof define&&define.amd)define([],t);else{var n;n="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,n.efrt=t()}}(function(){var t;return function t(n,e,o){function i(s,u){if(!e[s]){if(!n[s]){var f="function"==typeof require&&require;if(!u&&f)return f(s,!0);if(r)return r(s,!0);var c=new Error("Cannot find module '"+s+"'");throw c.code="MODULE_NOT_FOUND",c}var d=e[s]={exports:{}};n[s][0].call(d.exports,function(t){var e=n[s][1][t];return i(e?e:t)},d,d.exports,t,n,e,o)}return e[s].exports}for(var r="function"==typeof require&&require,s=0;s<o.length;s++)i(o[s]);return i}({1:[function(t,n,e){"use strict";var o=36,i="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",r=i.split("").reduce(function(t,n,e){return t[n]=e,t},{}),s=function(t){if(void 0!==i[t])return i[t];for(var n=1,e=o,r="";t>=e;t-=e,n++,e*=o);for(;n--;){var s=t%o;r=String.fromCharCode((s<10?48:55)+s)+r,t=(t-s)/o}return r},u=function(t){if(void 0!==r[t])return r[t];for(var n=0,e=1,i=o,s=1;e<t.length;n+=i,e++,i*=o);for(var u=t.length-1;u>=0;u--,s*=o){var f=t.charCodeAt(u)-48;f>10&&(f-=7),n+=f*s}return n};n.exports={toAlphaCode:s,fromAlphaCode:u}},{}],2:[function(n,e,o){(function(o){"use strict";var i={pack:n("./pack/index"),unpack:n("./unpack/index")};"undefined"!=typeof self?self.efrt=i:"undefined"!=typeof window?window.efrt=i:"undefined"!=typeof o&&(o.efrt=i),"function"==typeof t&&t.amd&&t(i),"undefined"!=typeof e&&(e.exports=i)}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{"./pack/index":5,"./unpack/index":9}],3:[function(t,n,e){"use strict";var o=function(t,n){for(var e=Math.min(t.length,n.length);e>0;){var o=t.slice(0,e);if(o===n.slice(0,e))return o;e-=1}return""},i=function(t){t.sort();for(var n=1;n<t.length;n++)t[n-1]===t[n]&&t.splice(n,1)};n.exports={commonPrefix:o,unique:i}},{}],4:[function(t,n,e){"use strict";var o=function(){this.counts={}},i={init:function(t){void 0===this.counts[t]&&(this.counts[t]=0)},add:function(t,n){void 0===n&&(n=1),this.init(t),this.counts[t]+=n},countOf:function(t){return this.init(t),this.counts[t]},highest:function(t){for(var n=[],e=Object.keys(this.counts),o=0;o<e.length;o++){var i=e[o];n.push([i,this.counts[i]])}return n.sort(function(t,n){return n[1]-t[1]}),t&&(n=n.slice(0,t)),n}};Object.keys(i).forEach(function(t){o.prototype[t]=i[t]}),n.exports=o},{}],5:[function(t,n,e){"use strict";var o=t("./trie"),i=function(t){return null===t||void 0===t?{}:"string"==typeof t?t.split(/ +/g).reduce(function(t,n){return t[n]=!0,t},{}):"[object Array]"===Object.prototype.toString.call(t)?t.reduce(function(t,n){return t[n]=!0,t},{}):t},r=function(t){t=i(t);var n=Object.keys(t).reduce(function(n,e){var o=t[e];return n[o]=n[o]||[],n[o].push(e),n},{});return Object.keys(n).forEach(function(t){var e=new o(n[t]);n[t]=e.pack()}),n=JSON.stringify(n,null,0)};n.exports=r},{"./trie":8}],6:[function(t,n,e){"use strict";var o="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},i=t("./fns"),r=t("./pack"),s=new RegExp("[0-9A-Z,;!]");n.exports={insertWords:function(t){if(void 0!==t){"string"==typeof t&&(t=t.split(/[^a-zA-Z]+/));for(var n=0;n<t.length;n++)t[n]=t[n].toLowerCase();i.unique(t);for(var e=0;e<t.length;e++)null===t[e].match(s)&&this.insert(t[e])}},insert:function(t){this._insert(t,this.root);var n=this.lastWord;this.lastWord=t;var e=i.commonPrefix(t,n);if(e!==n){var o=this.uniqueNode(n,t,this.root);o&&this.combineSuffixNode(o)}},_insert:function(t,n){var e=void 0,r=void 0;if(0!==t.length){for(var s=Object.keys(n),u=0;u<s.length;u++){var f=s[u];if(e=i.commonPrefix(t,f),0!==e.length){if(f===e&&"object"===o(n[f]))return void this._insert(t.slice(e.length),n[f]);if(f===t&&"number"==typeof n[f])return;return r={},r[f.slice(e.length)]=n[f],this.addTerminal(r,t=t.slice(e.length)),delete n[f],n[e]=r,void this.wordCount++}}this.addTerminal(n,t),this.wordCount++}},addTerminal:function(t,n){if(n.length<=1)return void(t[n]=1);var e={};t[n[0]]=e,this.addTerminal(e,n.slice(1))},nodeProps:function(t,n){var e=[];for(var i in t)""!==i&&"_"!==i[0]&&(n&&"object"!==o(t[i])||e.push(i));return e.sort(),e},optimize:function(){this.combineSuffixNode(this.root),this.prepDFS(),this.countDegree(this.root),this.prepDFS(),this.collapseChains(this.root)},combineSuffixNode:function(t){if(t._c)return t;var n=[];this.isTerminal(t)&&n.push("!");for(var e=this.nodeProps(t),i=0;i<e.length;i++){var r=e[i];"object"===o(t[r])?(t[r]=this.combineSuffixNode(t[r]),n.push(r),n.push(t[r]._c)):n.push(r)}n=n.join("-");var s=this.suffixes[n];return s?s:(this.suffixes[n]=t,t._c=this.cNext++,t)},prepDFS:function(){this.vCur++},visited:function(t){return t._v===this.vCur||(t._v=this.vCur,!1)},countDegree:function(t){if(void 0===t._d&&(t._d=0),t._d++,!this.visited(t))for(var n=this.nodeProps(t,!0),e=0;e<n.length;e++)this.countDegree(t[n[e]])},collapseChains:function(t){var n=void 0,e=void 0,i=void 0,r=void 0;if(!this.visited(t)){for(e=this.nodeProps(t),r=0;r<e.length;r++)n=e[r],i=t[n],"object"===("undefined"==typeof i?"undefined":o(i))&&(this.collapseChains(i),void 0===i._g||1!==i._d&&1!==i._g.length||(delete t[n],n+=i._g,t[n]=i[i._g]));1!==e.length||this.isTerminal(t)||(t._g=n)}},isTerminal:function(t){return!!t[""]},uniqueNode:function(t,n,e){for(var o=this.nodeProps(e,!0),i=0;i<o.length;i++){var r=o[i];if(r===t.slice(0,r.length))return r!==n.slice(0,r.length)?e[r]:this.uniqueNode(t.slice(r.length),n.slice(r.length),e[r])}},pack:function(){return r(this)}}},{"./fns":3,"./pack":7}],7:[function(t,n,e){"use strict";var o=t("./histogram"),i=t("../encoding"),r={NODE_SEP:";",STRING_SEP:",",TERMINAL_PREFIX:"!",BASE:36},s=function(t,n){var e="",o="";t.isTerminal(n)&&(e+=r.TERMINAL_PREFIX);for(var s=t.nodeProps(n),u=0;u<s.length;u++){var f=s[u];if("number"!=typeof n[f])if(t.syms[n[f]._n])e+=o+f+t.syms[n[f]._n],o="";else{var c=i.toAlphaCode(n._n-n[f]._n-1+t.symCount);n[f]._g&&c.length>=n[f]._g.length&&1===n[n[f]._g]?(c=n[f]._g,e+=o+f+c,o=r.STRING_SEP):(e+=o+f+c,o="")}else e+=o+f,o=r.STRING_SEP}return e},u=function t(n,e){if(!n.visited(e))for(var o=n.nodeProps(e,!0),s=0;s<o.length;s++){var u=o[s],f=e._n-e[u]._n-1;f<r.BASE&&n.histRel.add(f),n.histAbs.add(e[u]._n,i.toAlphaCode(f).length-1),t(n,e[u])}},f=function(t){t.histAbs=t.histAbs.highest(r.BASE);var n=[];n[-1]=0;for(var e=0,o=0,s=3+i.toAlphaCode(t.nodeCount).length,u=0;u<r.BASE&&void 0!==t.histAbs[u];u++)n[u]=t.histAbs[u][1]-s-t.histRel.countOf(r.BASE-u-1)+n[u-1],n[u]>=e&&(e=n[u],o=u+1);return o},c=function t(n,e){if(void 0===e._n){for(var o=n.nodeProps(e,!0),i=0;i<o.length;i++)t(n,e[o[i]]);e._n=n.pos++,n.nodes.unshift(e)}},d=function(t){t.nodes=[],t.nodeCount=0,t.syms={},t.symCount=0,t.pos=0,t.optimize(),t.histAbs=new o,t.histRel=new o,c(t,t.root),t.nodeCount=t.nodes.length,t.prepDFS(),u(t,t.root),t.symCount=f(t);for(var n=0;n<t.symCount;n++)t.syms[t.histAbs[n][0]]=i.toAlphaCode(n);for(var e=0;e<t.nodeCount;e++)t.nodes[e]=s(t,t.nodes[e]);for(var d=t.symCount-1;d>=0;d--)t.nodes.unshift(i.toAlphaCode(d)+":"+i.toAlphaCode(t.nodeCount-t.histAbs[d][0]-1));return t.nodes.join(r.NODE_SEP)};n.exports=d},{"../encoding":1,"./histogram":4}],8:[function(t,n,e){"use strict";var o=t("./methods"),i=function(t){this.root={},this.lastWord="",this.suffixes={},this.suffixCounts={},this.cNext=1,this.wordCount=0,this.insertWords(t),this.vCur=0};Object.keys(o).forEach(function(t){i.prototype[t]=o[t]}),n.exports=i},{"./methods":6}],9:[function(t,n,e){"use strict";var o=t("./unpack");n.exports=function(t){"string"==typeof t&&(t=JSON.parse(t));var n={};return Object.keys(t).forEach(function(e){for(var i=o(t[e]),r=0;r<i.length;r++)n[i[r]]=e}),n}},{"./unpack":11}],10:[function(t,n,e){"use strict";var o=t("../encoding");n.exports=function(t){for(var n=new RegExp("([0-9A-Z]+):([0-9A-Z]+)"),e=0;e<t.nodes.length;e++){var i=n.exec(t.nodes[e]);if(!i){t.symCount=e;break}t.syms[o.fromAlphaCode(i[1])]=o.fromAlphaCode(i[2])}t.nodes=t.nodes.slice(t.symCount,t.nodes.length)}},{"../encoding":1}],11:[function(t,n,e){"use strict";var o=t("./symbols"),i=t("../encoding"),r=function(t,n,e){var o=i.fromAlphaCode(n);return o<t.symCount?t.syms[o]:e+o+1-t.symCount},s=function(t){var n=[],e=function e(o,i){var s=t.nodes[o];"!"===s[0]&&(n.push(i),s=s.slice(1));for(var u=s.split(/([A-Z0-9,]+)/g),f=0;f<u.length;f+=2){var c=u[f],d=u[f+1];if(c){var h=i+c;if(","!==d&&void 0!==d){var a=r(t,d,o);e(a,h)}else n.push(h)}}};return e(0,""),n},u=function(t){var n={nodes:t.split(";"),syms:[],symCount:0};return t.match(":")&&o(n),s(n)};n.exports=u},{"../encoding":1,"./symbols":10}]},{},[2])(2)});

@@ -5,3 +5,3 @@ {

"description": "compressed-trie data-structure",
"version": "0.0.7",
"version": "1.0.0",
"main": "./builds/efrt.js",

@@ -39,2 +39,2 @@ "repository": {

"license": "MIT"
}
}
<div align="center">
<img src="https://cloud.githubusercontent.com/assets/399657/23590290/ede73772-01aa-11e7-8915-181ef21027bc.png" />
<div>trie-based compression of word-data</div>
<div>compression of key-value data</div>
<a href="https://npmjs.org/package/efrt">

@@ -13,37 +13,52 @@ <img src="https://img.shields.io/npm/v/efrt.svg?style=flat-square" />

<div align="center">
<code>npm i efrt</code>
<code>npm install efrt</code>
<br/>
(or alternatively:)
<br/>
<code>npm install efrt-unpack</code>
</div>
`efrt` is a prefix/suffix [trie](https://en.wikipedia.org/wiki/Trie) optimised for compression of english words.
`efrt` turns a javascript object into a very-compressed prefix [trie](https://en.wikipedia.org/wiki/Trie) format, so that any redundancies in key-value paris are compressed, and nothing is repeated.
it is based on [mckoss/lookups](https://github.com/mckoss/lookups) by [Mike Koss](https://github.com/mckoss)
and [bits.js](http://stevehanov.ca/blog/index.php?id=120) by [Steve Hanov](https://twitter.com/smhanov)
it is based on
[lookups](https://github.com/mckoss/lookups) by [Mike Koss](https://github.com/mckoss),
[tamper](https://nytimes.github.io/tamper/) by the [nyTimes](https://github.com/NYTimes/),
and
[bits.js](http://stevehanov.ca/blog/index.php?id=120) by [Steve Hanov](https://twitter.com/smhanov)
* squeeze a list of words into a very compact form
* squeeze a key-value object into a very compact form
* reduce filesize/bandwidth a bunch
* ensure unpacking overhead is negligible
* ensure the unpacking overhead is negligible
* word-lookups are critical-path
By doing the fancy stuff ahead-of-time, **efrt** lets you ship much bigger word-lists to the client-side, without much hassle.
By doing the fancy stuff ahead-of-time, **efrt** lets you ship much bigger key-value data to the client-side, without much hassle.
The whole library is *8kb*, the unpack-half is only *2.5kb*.
```js
var efrt = require('efrt')
var words = [
'coolage',
'cool',
'cool cat',
'cool.com',
'coolamungo'
];
var data = {
bedfordshire : 'England',
aberdeenshire : 'Scotland',
berkshire : 'England',
buckinghamshire: 'England',
argyllshire : 'Scotland',
bambridgeshire : 'England',
angus : 'Scotland',
bristol : 'England',
cheshire : 'England',
ayrshire : 'Scotland',
banffshire : 'Scotland',
berwickshire : 'Scotland'
}
//pack these words as tightly as possible
var compressed = efrt.pack(words);
//cool0;! cat,.com,a0;ge,mungo
var compressed = efrt.pack(data);
//{"England":"b0che2;ambridge1e0ristol,uckingham1;dford0rk0;shire","Scotland":"a1b0;anff1erwick1;berdeen0ngus,rgyll0yr0;shire"}
//create a lookup-trie
var trie = efrt.unpack(compressed);
var objAgain = efrt.unpack(compressed);
//hit it!
console.log(trie.has('cool'));//true
console.log(trie.has('miles davis'));//false
console.log(objAgain['bedfordshire']);//'England'
console.log(objAgain.hasOwnProperty('miles davis'));//false
```

@@ -55,26 +70,16 @@

the keys you input are pretty normalized. Spaces and unicode are good, but numbers, case-sensitivity, and *some punctuation* (semicolon, comma, exclamation-mark) are not (yet) supported.
the words you input should be pretty normalized. Spaces and unicode are good, but numbers, case-sensitivity, and [some punctuation](https://github.com/nlp-compromise/efrt/blob/master/src/config.js) are not (yet) supported.
## Performance
there are two modes that `efrt` can run in, depending on what you want to optimise for.
By itself, it will be ready-instantly, but must lookup words by their prefixes in the trie. This is not super-fast. If you want lookups to go faster, you can call `trie.cache()` first, to pre-compute the queries. Things will run much faster after this:
*efrt* is tuned to be very quick to unzip. It is O(1) to lookup. Packing-up the data is the slowest part, which is usually cool.
```js
var compressed = efrt.pack(skateboarders);//1k words (on a macbook)
var trie = efrt.unpack(compressed)
trie.has('tony hawk')
// trie-lookup: 1.1ms
// unpacking-step: 5.1ms
trie.cache()
// caching-step: 5.1ms
trie.has('tony hawk')
trie.hasOwnProperty('tony hawk')
// cached-lookup: 0.02ms
```
the `trie.cache()` command will spin the trie into a good-old javascript object, for faster lookups. It takes some time building it though.
In this example, with 1k words, it makes sense to hit `.cache()` if you are going to do more-than 5 lookups on the trie, but your mileage may vary.
You can access the object from `trie.toObject()`, or `trie.toArray()` if you'd like use it directly.
## Size

@@ -92,2 +97,4 @@ `efrt` will pack filesize down as much as possible, depending upon the redundancy of the prefixes/suffixes in the words, and the size of the list.

Assuming your data has a low _category-to-data ratio_, you will hit-breakeven with at about 250 keys. If your data is in the thousands, you can very be confident about saving your users some considerable bandwidth.
## Use

@@ -100,3 +107,3 @@ **IE9+**

var trie=efrt.unpack(smaller)
console.log(trie.has('moe'))
console.log(trie['moe'])
</script>

@@ -106,2 +113,5 @@ ```

if you're doing the second step in the client, you can load just the unpack-half of the library(~3k):
```bash
npm install efrt-unpack
```
```html

@@ -111,3 +121,3 @@ <script src="https://unpkg.com/efrt@latest/builds/efrt-unpack.min.js"></script>

var trie=unpack(compressedStuff);
trie.has('miles davis');
trie.hasOwnProperty('miles davis');
</script>

@@ -114,0 +124,0 @@ ```

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc