Comparing version 0.0.5 to 0.0.6
@@ -60,13 +60,101 @@ (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.unpack = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(_dereq_,module,exports){ | ||
var unpack = function unpack(str) { | ||
module.exports = function (str) { | ||
return new Ptrie(str); | ||
}; | ||
module.exports = unpack; | ||
},{"./ptrie":4}],3:[function(_dereq_,module,exports){ | ||
},{"./ptrie":5}],3:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
var encoding = _dereq_('../encoding'); | ||
var isPrefix = _dereq_('./prefix'); | ||
var unravel = _dereq_('./unravel'); | ||
var methods = { | ||
// Return largest matching string in the dictionary (or '') | ||
has: function has(want) { | ||
//fail-fast | ||
if (!want) { | ||
return false; | ||
} | ||
//then, try cache-lookup | ||
if (this._cache) { | ||
return this._cache[want] || false; | ||
} | ||
var self = this; | ||
var crawl = function crawl(index, prefix) { | ||
var node = self.nodes[index]; | ||
//the '!' means a prefix-alone is a good match | ||
if (node[0] === '!') { | ||
//try to match the prefix (the last branch) | ||
if (prefix === want) { | ||
return true; | ||
} | ||
node = node.slice(1); //ok, we tried. remove it. | ||
} | ||
//each possible match on this line is something like 'me,me2,me4'. | ||
//try each one | ||
var matches = node.split(/([A-Z0-9,]+)/g); | ||
for (var i = 0; i < matches.length; i += 2) { | ||
var str = matches[i]; | ||
var ref = matches[i + 1]; | ||
if (!str) { | ||
continue; | ||
} | ||
var have = prefix + str; | ||
//we're at the branch's end, so try to match it | ||
if (ref === ',' || ref === undefined) { | ||
if (have === want) { | ||
return true; | ||
} | ||
continue; | ||
} | ||
//ok, not a match. | ||
//well, should we keep going on this branch? | ||
//if we do, we ignore all the others here. | ||
if (isPrefix(have, want)) { | ||
index = self.indexFromRef(ref, index); | ||
return crawl(index, have); | ||
} | ||
//nah, lets try the next branch.. | ||
continue; | ||
} | ||
return false; | ||
}; | ||
return crawl(0, ''); | ||
}, | ||
// References are either absolute (symbol) or relative (1 - based) | ||
indexFromRef: function indexFromRef(ref, index) { | ||
var dnode = encoding.fromAlphaCode(ref); | ||
if (dnode < this.symCount) { | ||
return this.syms[dnode]; | ||
} | ||
return index + dnode + 1 - this.symCount; | ||
}, | ||
toArray: function toArray() { | ||
return Object.keys(this.toObject()); | ||
}, | ||
toObject: function toObject() { | ||
if (this._cache) { | ||
return this._cache; | ||
} | ||
return unravel(this); | ||
}, | ||
cache: function cache() { | ||
this._cache = unravel(this); | ||
this.nodes = null; | ||
this.syms = null; | ||
} | ||
}; | ||
module.exports = methods; | ||
},{"../encoding":1,"./prefix":4,"./unravel":7}],4:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
//are we on the right path with this string? | ||
var isPrefix = function isPrefix(str, want) { | ||
module.exports = function (str, want) { | ||
//allow perfect equals | ||
@@ -87,159 +175,59 @@ if (str === want) { | ||
}; | ||
module.exports = isPrefix; | ||
// console.log(isPrefix('harvar', 'harvard')); | ||
// console.log(module.exports('harvar', 'harvard')); | ||
},{}],4:[function(_dereq_,module,exports){ | ||
},{}],5:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); | ||
var parseSymbols = _dereq_('./symbols'); | ||
var methods = _dereq_('./methods'); | ||
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } | ||
var encoding = _dereq_('../encoding'); | ||
var isPrefix = _dereq_('./prefix'); | ||
var unravel = _dereq_('./unravel'); | ||
//PackedTrie - Trie traversal of the Trie packed-string representation. | ||
var PackedTrie = function () { | ||
function PackedTrie(str) { | ||
_classCallCheck(this, PackedTrie); | ||
this.nodes = str.split(';'); //that's all ;)! | ||
this.syms = []; | ||
this.symCount = 0; | ||
this._cache = null; | ||
//process symbols, if they have them | ||
if (str.match(':')) { | ||
this.initSymbols(); | ||
} | ||
var PackedTrie = function PackedTrie(str) { | ||
this.nodes = str.split(';'); //that's all ;)! | ||
this.syms = []; | ||
this.symCount = 0; | ||
this._cache = null; | ||
//process symbols, if they have them | ||
if (str.match(':')) { | ||
parseSymbols(this); | ||
} | ||
}; | ||
//the symbols are at the top of the array. | ||
Object.keys(methods).forEach(function (k) { | ||
PackedTrie.prototype[k] = methods[k]; | ||
}); | ||
module.exports = PackedTrie; | ||
_createClass(PackedTrie, [{ | ||
key: 'initSymbols', | ||
value: function initSymbols() { | ||
//... process these lines | ||
var reSymbol = new RegExp('([0-9A-Z]+):([0-9A-Z]+)'); | ||
for (var i = 0; i < this.nodes.length; i++) { | ||
var m = reSymbol.exec(this.nodes[i]); | ||
if (!m) { | ||
this.symCount = i; | ||
break; | ||
} | ||
this.syms[encoding.fromAlphaCode(m[1])] = encoding.fromAlphaCode(m[2]); | ||
} | ||
//remove from main node list | ||
this.nodes = this.nodes.slice(this.symCount, this.nodes.length); | ||
} | ||
},{"./methods":3,"./symbols":6}],6:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
// Return largest matching string in the dictionary (or '') | ||
var encoding = _dereq_('../encoding'); | ||
}, { | ||
key: 'has', | ||
value: function has(want) { | ||
var _this = this; | ||
//fail-fast | ||
if (!want) { | ||
return false; | ||
} | ||
//then, try cache-lookup | ||
if (this._cache) { | ||
return this._cache[want] || false; | ||
} | ||
var crawl = function crawl(index, prefix) { | ||
var node = _this.nodes[index]; | ||
//the '!' means a prefix-alone is a good match | ||
if (node[0] === '!') { | ||
//try to match the prefix (the last branch) | ||
if (prefix === want) { | ||
return true; | ||
} | ||
node = node.slice(1); //ok, we tried. remove it. | ||
} | ||
//each possible match on this line is something like 'me,me2,me4'. | ||
//try each one | ||
var matches = node.split(/([A-Z0-9,]+)/g); | ||
for (var i = 0; i < matches.length; i += 2) { | ||
var str = matches[i]; | ||
var ref = matches[i + 1]; | ||
if (!str) { | ||
continue; | ||
} | ||
var have = prefix + str; | ||
//we're at the branch's end, so try to match it | ||
if (ref === ',' || ref === undefined) { | ||
if (have === want) { | ||
return true; | ||
} | ||
continue; | ||
} | ||
//ok, not a match. | ||
//well, should we keep going on this branch? | ||
//if we do, we ignore all the others here. | ||
if (isPrefix(have, want)) { | ||
index = _this.indexFromRef(ref, index); | ||
return crawl(index, have); | ||
} | ||
//nah, lets try the next branch.. | ||
continue; | ||
} | ||
return false; | ||
}; | ||
return crawl(0, ''); | ||
//the symbols are at the top of the array. | ||
module.exports = function (t) { | ||
//... process these lines | ||
var reSymbol = new RegExp('([0-9A-Z]+):([0-9A-Z]+)'); | ||
for (var i = 0; i < t.nodes.length; i++) { | ||
var m = reSymbol.exec(t.nodes[i]); | ||
if (!m) { | ||
t.symCount = i; | ||
break; | ||
} | ||
t.syms[encoding.fromAlphaCode(m[1])] = encoding.fromAlphaCode(m[2]); | ||
} | ||
//remove from main node list | ||
t.nodes = t.nodes.slice(t.symCount, t.nodes.length); | ||
}; | ||
// References are either absolute (symbol) or relative (1 - based) | ||
}, { | ||
key: 'indexFromRef', | ||
value: function indexFromRef(ref, index) { | ||
var dnode = encoding.fromAlphaCode(ref); | ||
if (dnode < this.symCount) { | ||
return this.syms[dnode]; | ||
} | ||
return index + dnode + 1 - this.symCount; | ||
} | ||
}, { | ||
key: 'toArray', | ||
value: function toArray() { | ||
return Object.keys(this.toObject()); | ||
} | ||
}, { | ||
key: 'toObject', | ||
value: function toObject() { | ||
if (this._cache) { | ||
return this._cache; | ||
} | ||
return unravel(this); | ||
} | ||
}, { | ||
key: 'cache', | ||
value: function cache() { | ||
this._cache = unravel(this); | ||
this.nodes = null; | ||
this.syms = null; | ||
} | ||
}]); | ||
return PackedTrie; | ||
}(); | ||
module.exports = PackedTrie; | ||
},{"../encoding":1,"./prefix":3,"./unravel":5}],5:[function(_dereq_,module,exports){ | ||
},{"../encoding":1}],7:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
//spin-out all words from this trie | ||
var unRavel = function unRavel(trie) { | ||
module.exports = function (trie) { | ||
var all = {}; | ||
var crawl = function crawl(index, prefix) { | ||
var crawl = function crawl(index, pref) { | ||
var node = trie.nodes[index]; | ||
if (node[0] === '!') { | ||
all[prefix] = true; | ||
all[pref] = true; | ||
node = node.slice(1); //ok, we tried. remove it. | ||
@@ -255,3 +243,3 @@ } | ||
var have = prefix + str; | ||
var have = pref + str; | ||
//branch's end | ||
@@ -269,5 +257,4 @@ if (ref === ',' || ref === undefined) { | ||
}; | ||
module.exports = unRavel; | ||
},{}]},{},[2])(2) | ||
}); |
@@ -1,2 +0,2 @@ | ||
/* efrt trie-compression v0.0.5 github.com/nlp-compromise/efrt - MIT */ | ||
/* efrt trie-compression v0.0.6 github.com/nlp-compromise/efrt - MIT */ | ||
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.unpack = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(_dereq_,module,exports){ | ||
@@ -7,3 +7,3 @@ 'use strict'; | ||
const seq = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'; | ||
const cache = seq.split('').reduce((h, c, i) => { | ||
const cache = seq.split('').reduce(function(h, c, i) { | ||
h[c] = i; | ||
@@ -63,32 +63,8 @@ return h; | ||
const unpack = (str) => { | ||
module.exports = function(str) { | ||
return new Ptrie(str); | ||
}; | ||
module.exports = unpack; | ||
},{"./ptrie":4}],3:[function(_dereq_,module,exports){ | ||
},{"./ptrie":5}],3:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
//are we on the right path with this string? | ||
const isPrefix = function(str, want) { | ||
//allow perfect equals | ||
if (str === want) { | ||
return true; | ||
} | ||
//compare lengths | ||
let len = str.length; | ||
if (len >= want.length) { | ||
return false; | ||
} | ||
//quick slice | ||
if (len === 1) { | ||
return str === want[0]; | ||
} | ||
return want.slice(0, len) === str; | ||
}; | ||
module.exports = isPrefix; | ||
// console.log(isPrefix('harvar', 'harvard')); | ||
},{}],4:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
const encoding = _dereq_('../encoding'); | ||
@@ -98,33 +74,5 @@ const isPrefix = _dereq_('./prefix'); | ||
//PackedTrie - Trie traversal of the Trie packed-string representation. | ||
class PackedTrie { | ||
constructor(str) { | ||
this.nodes = str.split(';'); //that's all ;)! | ||
this.syms = []; | ||
this.symCount = 0; | ||
this._cache = null; | ||
//process symbols, if they have them | ||
if (str.match(':')) { | ||
this.initSymbols(); | ||
} | ||
} | ||
//the symbols are at the top of the array. | ||
initSymbols() { | ||
//... process these lines | ||
const reSymbol = new RegExp('([0-9A-Z]+):([0-9A-Z]+)'); | ||
for(let i = 0; i < this.nodes.length; i++) { | ||
const m = reSymbol.exec(this.nodes[i]); | ||
if (!m) { | ||
this.symCount = i; | ||
break; | ||
} | ||
this.syms[encoding.fromAlphaCode(m[1])] = encoding.fromAlphaCode(m[2]); | ||
} | ||
//remove from main node list | ||
this.nodes = this.nodes.slice(this.symCount, this.nodes.length); | ||
} | ||
const methods = { | ||
// Return largest matching string in the dictionary (or '') | ||
has(want) { | ||
has: function(want) { | ||
//fail-fast | ||
@@ -138,4 +86,5 @@ if (!want) { | ||
} | ||
const crawl = (index, prefix) => { | ||
let node = this.nodes[index]; | ||
let self = this; | ||
const crawl = function(index, prefix) { | ||
let node = self.nodes[index]; | ||
//the '!' means a prefix-alone is a good match | ||
@@ -170,3 +119,3 @@ if (node[0] === '!') { | ||
if (isPrefix(have, want)) { | ||
index = this.indexFromRef(ref, index); | ||
index = self.indexFromRef(ref, index); | ||
return crawl(index, have); | ||
@@ -181,6 +130,6 @@ } | ||
return crawl(0, ''); | ||
} | ||
}, | ||
// References are either absolute (symbol) or relative (1 - based) | ||
indexFromRef(ref, index) { | ||
indexFromRef: function(ref, index) { | ||
const dnode = encoding.fromAlphaCode(ref); | ||
@@ -191,8 +140,9 @@ if (dnode < this.symCount) { | ||
return index + dnode + 1 - this.symCount; | ||
} | ||
}, | ||
toArray() { | ||
toArray: function() { | ||
return Object.keys(this.toObject()); | ||
} | ||
toObject() { | ||
}, | ||
toObject: function() { | ||
if (this._cache) { | ||
@@ -202,4 +152,5 @@ return this._cache; | ||
return unravel(this); | ||
} | ||
cache() { | ||
}, | ||
cache: function() { | ||
this._cache = unravel(this); | ||
@@ -209,17 +160,78 @@ this.nodes = null; | ||
} | ||
}; | ||
module.exports = methods; | ||
} | ||
},{"../encoding":1,"./prefix":4,"./unravel":7}],4:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
//are we on the right path with this string? | ||
module.exports = function(str, want) { | ||
//allow perfect equals | ||
if (str === want) { | ||
return true; | ||
} | ||
//compare lengths | ||
let len = str.length; | ||
if (len >= want.length) { | ||
return false; | ||
} | ||
//quick slice | ||
if (len === 1) { | ||
return str === want[0]; | ||
} | ||
return want.slice(0, len) === str; | ||
}; | ||
// console.log(module.exports('harvar', 'harvard')); | ||
},{}],5:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
const parseSymbols = _dereq_('./symbols'); | ||
const methods = _dereq_('./methods'); | ||
//PackedTrie - Trie traversal of the Trie packed-string representation. | ||
const PackedTrie = function(str) { | ||
this.nodes = str.split(';'); //that's all ;)! | ||
this.syms = []; | ||
this.symCount = 0; | ||
this._cache = null; | ||
//process symbols, if they have them | ||
if (str.match(':')) { | ||
parseSymbols(this); | ||
} | ||
}; | ||
Object.keys(methods).forEach(function(k) { | ||
PackedTrie.prototype[k] = methods[k]; | ||
}); | ||
module.exports = PackedTrie; | ||
},{"../encoding":1,"./prefix":3,"./unravel":5}],5:[function(_dereq_,module,exports){ | ||
},{"./methods":3,"./symbols":6}],6:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
const encoding = _dereq_('../encoding'); | ||
//the symbols are at the top of the array. | ||
module.exports = function(t) { | ||
//... process these lines | ||
const reSymbol = new RegExp('([0-9A-Z]+):([0-9A-Z]+)'); | ||
for(let i = 0; i < t.nodes.length; i++) { | ||
const m = reSymbol.exec(t.nodes[i]); | ||
if (!m) { | ||
t.symCount = i; | ||
break; | ||
} | ||
t.syms[encoding.fromAlphaCode(m[1])] = encoding.fromAlphaCode(m[2]); | ||
} | ||
//remove from main node list | ||
t.nodes = t.nodes.slice(t.symCount, t.nodes.length); | ||
}; | ||
},{"../encoding":1}],7:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
//spin-out all words from this trie | ||
const unRavel = (trie) => { | ||
module.exports = function(trie) { | ||
let all = {}; | ||
const crawl = function(index, prefix) { | ||
const crawl = function(index, pref) { | ||
let node = trie.nodes[index]; | ||
if (node[0] === '!') { | ||
all[prefix] = true; | ||
all[pref] = true; | ||
node = node.slice(1); //ok, we tried. remove it. | ||
@@ -235,3 +247,3 @@ } | ||
let have = prefix + str; | ||
let have = pref + str; | ||
//branch's end | ||
@@ -249,5 +261,4 @@ if (ref === ',' || ref === undefined) { | ||
}; | ||
module.exports = unRavel; | ||
},{}]},{},[2])(2) | ||
}); |
@@ -1,2 +0,2 @@ | ||
/* efrt trie-compression v0.0.5 github.com/nlp-compromise/efrt - MIT */ | ||
!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var n;n="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,n.unpack=e()}}(function(){return function e(n,t,r){function i(u,f){if(!t[u]){if(!n[u]){var s="function"==typeof require&&require;if(!f&&s)return s(u,!0);if(o)return o(u,!0);var a=new Error("Cannot find module '"+u+"'");throw a.code="MODULE_NOT_FOUND",a}var c=t[u]={exports:{}};n[u][0].call(c.exports,function(e){var t=n[u][1][e];return i(t?t:e)},c,c.exports,e,n,t,r)}return t[u].exports}for(var o="function"==typeof require&&require,u=0;u<r.length;u++)i(r[u]);return i}({1:[function(e,n,t){"use strict";var r=36,i="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",o=i.split("").reduce(function(e,n,t){return e[n]=t,e},{}),u=function(e){if(void 0!==i[e])return i[e];for(var n=1,t=r,o="";e>=t;e-=t,n++,t*=r);for(;n--;){var u=e%r;o=String.fromCharCode((u<10?48:55)+u)+o,e=(e-u)/r}return o},f=function(e){if(void 0!==o[e])return o[e];for(var n=0,t=1,i=r,u=1;t<e.length;n+=i,t++,i*=r);for(var f=e.length-1;f>=0;f--,u*=r){var s=e.charCodeAt(f)-48;s>10&&(s-=7),n+=s*u}return n};n.exports={toAlphaCode:u,fromAlphaCode:f}},{}],2:[function(e,n,t){"use strict";var r=e("./ptrie"),i=function(e){return new r(e)};n.exports=i},{"./ptrie":4}],3:[function(e,n,t){"use strict";var r=function(e,n){if(e===n)return!0;var t=e.length;return!(t>=n.length)&&(1===t?e===n[0]:n.slice(0,t)===e)};n.exports=r},{}],4:[function(e,n,t){"use strict";function r(e,n){if(!(e instanceof n))throw new TypeError("Cannot call a class as a function")}var i=function(){function e(e,n){for(var t=0;t<n.length;t++){var r=n[t];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(e,r.key,r)}}return function(n,t,r){return t&&e(n.prototype,t),r&&e(n,r),n}}(),o=e("../encoding"),u=e("./prefix"),f=e("./unravel"),s=function(){function e(n){r(this,e),this.nodes=n.split(";"),this.syms=[],this.symCount=0,this._cache=null,n.match(":")&&this.initSymbols()}return i(e,[{key:"initSymbols",value:function(){for(var e=new RegExp("([0-9A-Z]+):([0-9A-Z]+)"),n=0;n<this.nodes.length;n++){var t=e.exec(this.nodes[n]);if(!t){this.symCount=n;break}this.syms[o.fromAlphaCode(t[1])]=o.fromAlphaCode(t[2])}this.nodes=this.nodes.slice(this.symCount,this.nodes.length)}},{key:"has",value:function(e){var n=this;if(!e)return!1;if(this._cache)return this._cache[e]||!1;var t=function t(r,i){var o=n.nodes[r];if("!"===o[0]){if(i===e)return!0;o=o.slice(1)}for(var f=o.split(/([A-Z0-9,]+)/g),s=0;s<f.length;s+=2){var a=f[s],c=f[s+1];if(a){var l=i+a;if(","!==c&&void 0!==c){if(u(l,e))return r=n.indexFromRef(c,r),t(r,l)}else if(l===e)return!0}}return!1};return t(0,"")}},{key:"indexFromRef",value:function(e,n){var t=o.fromAlphaCode(e);return t<this.symCount?this.syms[t]:n+t+1-this.symCount}},{key:"toArray",value:function(){return Object.keys(this.toObject())}},{key:"toObject",value:function(){return this._cache?this._cache:f(this)}},{key:"cache",value:function(){this._cache=f(this),this.nodes=null,this.syms=null}}]),e}();n.exports=s},{"../encoding":1,"./prefix":3,"./unravel":5}],5:[function(e,n,t){"use strict";var r=function(e){var n={},t=function t(r,i){var o=e.nodes[r];"!"===o[0]&&(n[i]=!0,o=o.slice(1));for(var u=o.split(/([A-Z0-9,]+)/g),f=0;f<u.length;f+=2){var s=u[f],a=u[f+1];if(s){var c=i+s;if(","!==a&&void 0!==a){var l=e.indexFromRef(a,r);t(l,c)}else n[c]=!0}}};return t(0,""),n};n.exports=r},{}]},{},[2])(2)}); | ||
/* efrt trie-compression v0.0.6 github.com/nlp-compromise/efrt - MIT */ | ||
!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var t;t="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,t.unpack=e()}}(function(){return function e(t,n,r){function o(s,f){if(!n[s]){if(!t[s]){var u="function"==typeof require&&require;if(!f&&u)return u(s,!0);if(i)return i(s,!0);var c=new Error("Cannot find module '"+s+"'");throw c.code="MODULE_NOT_FOUND",c}var a=n[s]={exports:{}};t[s][0].call(a.exports,function(e){var n=t[s][1][e];return o(n?n:e)},a,a.exports,e,t,n,r)}return n[s].exports}for(var i="function"==typeof require&&require,s=0;s<r.length;s++)o(r[s]);return o}({1:[function(e,t,n){"use strict";var r=36,o="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",i=o.split("").reduce(function(e,t,n){return e[t]=n,e},{}),s=function(e){if(void 0!==o[e])return o[e];for(var t=1,n=r,i="";e>=n;e-=n,t++,n*=r);for(;t--;){var s=e%r;i=String.fromCharCode((s<10?48:55)+s)+i,e=(e-s)/r}return i},f=function(e){if(void 0!==i[e])return i[e];for(var t=0,n=1,o=r,s=1;n<e.length;t+=o,n++,o*=r);for(var f=e.length-1;f>=0;f--,s*=r){var u=e.charCodeAt(f)-48;u>10&&(u-=7),t+=u*s}return t};t.exports={toAlphaCode:s,fromAlphaCode:f}},{}],2:[function(e,t,n){"use strict";var r=e("./ptrie");t.exports=function(e){return new r(e)}},{"./ptrie":5}],3:[function(e,t,n){"use strict";var r=e("../encoding"),o=e("./prefix"),i=e("./unravel"),s={has:function(e){if(!e)return!1;if(this._cache)return this._cache[e]||!1;var t=this,n=function n(r,i){var s=t.nodes[r];if("!"===s[0]){if(i===e)return!0;s=s.slice(1)}for(var f=s.split(/([A-Z0-9,]+)/g),u=0;u<f.length;u+=2){var c=f[u],a=f[u+1];if(c){var d=i+c;if(","!==a&&void 0!==a){if(o(d,e))return r=t.indexFromRef(a,r),n(r,d)}else if(d===e)return!0}}return!1};return n(0,"")},indexFromRef:function(e,t){var n=r.fromAlphaCode(e);return n<this.symCount?this.syms[n]:t+n+1-this.symCount},toArray:function(){return Object.keys(this.toObject())},toObject:function(){return this._cache?this._cache:i(this)},cache:function(){this._cache=i(this),this.nodes=null,this.syms=null}};t.exports=s},{"../encoding":1,"./prefix":4,"./unravel":7}],4:[function(e,t,n){"use strict";t.exports=function(e,t){if(e===t)return!0;var n=e.length;return!(n>=t.length)&&(1===n?e===t[0]:t.slice(0,n)===e)}},{}],5:[function(e,t,n){"use strict";var r=e("./symbols"),o=e("./methods"),i=function(e){this.nodes=e.split(";"),this.syms=[],this.symCount=0,this._cache=null,e.match(":")&&r(this)};Object.keys(o).forEach(function(e){i.prototype[e]=o[e]}),t.exports=i},{"./methods":3,"./symbols":6}],6:[function(e,t,n){"use strict";var r=e("../encoding");t.exports=function(e){for(var t=new RegExp("([0-9A-Z]+):([0-9A-Z]+)"),n=0;n<e.nodes.length;n++){var o=t.exec(e.nodes[n]);if(!o){e.symCount=n;break}e.syms[r.fromAlphaCode(o[1])]=r.fromAlphaCode(o[2])}e.nodes=e.nodes.slice(e.symCount,e.nodes.length)}},{"../encoding":1}],7:[function(e,t,n){"use strict";t.exports=function(e){var t={},n=function n(r,o){var i=e.nodes[r];"!"===i[0]&&(t[o]=!0,i=i.slice(1));for(var s=i.split(/([A-Z0-9,]+)/g),f=0;f<s.length;f+=2){var u=s[f],c=s[f+1];if(u){var a=o+u;if(","!==c&&void 0!==c){var d=e.indexFromRef(c,r);n(d,a)}else t[a]=!0}}};return n(0,""),t}},{}]},{},[2])(2)}); |
@@ -1,2 +0,2 @@ | ||
/* efrt trie-compression v0.0.5 github.com/nlp-compromise/efrt - MIT */ | ||
/* efrt trie-compression v0.0.6 github.com/nlp-compromise/efrt - MIT */ | ||
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.efrt = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(_dereq_,module,exports){ | ||
@@ -69,5 +69,6 @@ 'use strict'; | ||
},{}],3:[function(_dereq_,module,exports){ | ||
(function (global){ | ||
'use strict'; | ||
module.exports = { | ||
var efrt = { | ||
pack: _dereq_('./pack/index'), | ||
@@ -77,3 +78,21 @@ unpack: _dereq_('./unpack/index') | ||
},{"./pack/index":6,"./unpack/index":9}],4:[function(_dereq_,module,exports){ | ||
//and then all-the-exports... | ||
if (typeof self !== 'undefined') { | ||
self.efrt = efrt; // Web Worker | ||
} else if (typeof window !== 'undefined') { | ||
window.efrt = efrt; // Browser | ||
} else if (typeof global !== 'undefined') { | ||
global.efrt = efrt; // NodeJS | ||
} | ||
//don't forget amd! | ||
if (typeof define === 'function' && define.amd) { | ||
define(efrt); | ||
} | ||
//then for some reason, do this too! | ||
if (typeof module !== 'undefined') { | ||
module.exports = efrt; | ||
} | ||
}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) | ||
},{"./pack/index":6,"./unpack/index":10}],4:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
@@ -189,5 +208,271 @@ | ||
},{"./trie":8}],7:[function(_dereq_,module,exports){ | ||
},{"./trie":9}],7:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; | ||
var fns = _dereq_('./fns'); | ||
var _pack = _dereq_('./pack'); | ||
var config = _dereq_('../config'); | ||
module.exports = { | ||
// Insert words from one big string, or from an array. | ||
insertWords: function insertWords(words) { | ||
if (words === undefined) { | ||
return; | ||
} | ||
if (typeof words === 'string') { | ||
words = words.split(/[^a-zA-Z]+/); | ||
} | ||
for (var i = 0; i < words.length; i++) { | ||
words[i] = words[i].toLowerCase(); | ||
} | ||
fns.unique(words); | ||
for (var _i = 0; _i < words.length; _i++) { | ||
if (words[_i].match(config.NOT_ALLOWED) === null) { | ||
this.insert(words[_i]); | ||
} | ||
} | ||
}, | ||
insert: function insert(word) { | ||
this._insert(word, this.root); | ||
var lastWord = this.lastWord; | ||
this.lastWord = word; | ||
var prefix = fns.commonPrefix(word, lastWord); | ||
if (prefix === lastWord) { | ||
return; | ||
} | ||
var freeze = this.uniqueNode(lastWord, word, this.root); | ||
if (freeze) { | ||
this.combineSuffixNode(freeze); | ||
} | ||
}, | ||
_insert: function _insert(word, node) { | ||
var prefix = void 0, | ||
next = void 0; | ||
// Duplicate word entry - ignore | ||
if (word.length === 0) { | ||
return; | ||
} | ||
// Do any existing props share a common prefix? | ||
var keys = Object.keys(node); | ||
for (var i = 0; i < keys.length; i++) { | ||
var prop = keys[i]; | ||
prefix = fns.commonPrefix(word, prop); | ||
if (prefix.length === 0) { | ||
continue; | ||
} | ||
// Prop is a proper prefix - recurse to child node | ||
if (prop === prefix && _typeof(node[prop]) === 'object') { | ||
this._insert(word.slice(prefix.length), node[prop]); | ||
return; | ||
} | ||
// Duplicate terminal string - ignore | ||
if (prop === word && typeof node[prop] === 'number') { | ||
return; | ||
} | ||
next = {}; | ||
next[prop.slice(prefix.length)] = node[prop]; | ||
this.addTerminal(next, word = word.slice(prefix.length)); | ||
delete node[prop]; | ||
node[prefix] = next; | ||
this.wordCount++; | ||
return; | ||
} | ||
// No shared prefix. Enter the word here as a terminal string. | ||
this.addTerminal(node, word); | ||
this.wordCount++; | ||
}, | ||
// Add a terminal string to node. | ||
// If 2 characters or less, just add with value == 1. | ||
// If more than 2 characters, point to shared node | ||
// Note - don't prematurely share suffixes - these | ||
// terminals may become split and joined with other | ||
// nodes in this part of the tree. | ||
addTerminal: function addTerminal(node, prop) { | ||
if (prop.length <= 1) { | ||
node[prop] = 1; | ||
return; | ||
} | ||
var next = {}; | ||
node[prop[0]] = next; | ||
this.addTerminal(next, prop.slice(1)); | ||
}, | ||
// Well ordered list of properties in a node (string or object properties) | ||
// Use nodesOnly==true to return only properties of child nodes (not | ||
// terminal strings. | ||
nodeProps: function nodeProps(node, nodesOnly) { | ||
var props = []; | ||
for (var prop in node) { | ||
if (prop !== '' && prop[0] !== '_') { | ||
if (!nodesOnly || _typeof(node[prop]) === 'object') { | ||
props.push(prop); | ||
} | ||
} | ||
} | ||
props.sort(); | ||
return props; | ||
}, | ||
optimize: function optimize() { | ||
this.combineSuffixNode(this.root); | ||
this.prepDFS(); | ||
this.countDegree(this.root); | ||
this.prepDFS(); | ||
this.collapseChains(this.root); | ||
}, | ||
// Convert Trie to a DAWG by sharing identical nodes | ||
combineSuffixNode: function combineSuffixNode(node) { | ||
// Frozen node - can't change. | ||
if (node._c) { | ||
return node; | ||
} | ||
// Make sure all children are combined and generate unique node | ||
// signature for this node. | ||
var sig = []; | ||
if (this.isTerminal(node)) { | ||
sig.push('!'); | ||
} | ||
var props = this.nodeProps(node); | ||
for (var i = 0; i < props.length; i++) { | ||
var prop = props[i]; | ||
if (_typeof(node[prop]) === 'object') { | ||
node[prop] = this.combineSuffixNode(node[prop]); | ||
sig.push(prop); | ||
sig.push(node[prop]._c); | ||
} else { | ||
sig.push(prop); | ||
} | ||
} | ||
sig = sig.join('-'); | ||
var shared = this.suffixes[sig]; | ||
if (shared) { | ||
return shared; | ||
} | ||
this.suffixes[sig] = node; | ||
node._c = this.cNext++; | ||
return node; | ||
}, | ||
prepDFS: function prepDFS() { | ||
this.vCur++; | ||
}, | ||
visited: function visited(node) { | ||
if (node._v === this.vCur) { | ||
return true; | ||
} | ||
node._v = this.vCur; | ||
return false; | ||
}, | ||
countDegree: function countDegree(node) { | ||
if (node._d === undefined) { | ||
node._d = 0; | ||
} | ||
node._d++; | ||
if (this.visited(node)) { | ||
return; | ||
} | ||
var props = this.nodeProps(node, true); | ||
for (var i = 0; i < props.length; i++) { | ||
this.countDegree(node[props[i]]); | ||
} | ||
}, | ||
// Remove intermediate singleton nodes by hoisting into their parent | ||
collapseChains: function collapseChains(node) { | ||
var prop = void 0, | ||
props = void 0, | ||
child = void 0, | ||
i = void 0; | ||
if (this.visited(node)) { | ||
return; | ||
} | ||
props = this.nodeProps(node); | ||
for (i = 0; i < props.length; i++) { | ||
prop = props[i]; | ||
child = node[prop]; | ||
if ((typeof child === 'undefined' ? 'undefined' : _typeof(child)) !== 'object') { | ||
continue; | ||
} | ||
this.collapseChains(child); | ||
// Hoist the singleton child's single property to the parent | ||
if (child._g !== undefined && (child._d === 1 || child._g.length === 1)) { | ||
delete node[prop]; | ||
prop += child._g; | ||
node[prop] = child[child._g]; | ||
} | ||
} | ||
// Identify singleton nodes | ||
if (props.length === 1 && !this.isTerminal(node)) { | ||
node._g = prop; | ||
} | ||
}, | ||
has: function has(word) { | ||
return this.isFragment(word, this.root); | ||
}, | ||
isTerminal: function isTerminal(node) { | ||
return !!node['']; | ||
}, | ||
isFragment: function isFragment(word, node) { | ||
if (word.length === 0) { | ||
return this.isTerminal(node); | ||
} | ||
if (node[word] === 1) { | ||
return true; | ||
} | ||
// Find a prefix of word reference to a child | ||
var props = this.nodeProps(node, true); | ||
for (var i = 0; i < props.length; i++) { | ||
var prop = props[i]; | ||
if (prop === word.slice(0, prop.length)) { | ||
return this.isFragment(word.slice(prop.length), node[prop]); | ||
} | ||
} | ||
return false; | ||
}, | ||
// Find highest node in Trie that is on the path to word | ||
// and that is NOT on the path to other. | ||
uniqueNode: function uniqueNode(word, other, node) { | ||
var props = this.nodeProps(node, true); | ||
for (var i = 0; i < props.length; i++) { | ||
var prop = props[i]; | ||
if (prop === word.slice(0, prop.length)) { | ||
if (prop !== other.slice(0, prop.length)) { | ||
return node[prop]; | ||
} | ||
return this.uniqueNode(word.slice(prop.length), other.slice(prop.length), node[prop]); | ||
} | ||
} | ||
return undefined; | ||
}, | ||
pack: function pack() { | ||
return _pack(this); | ||
} | ||
}; | ||
},{"../config":1,"./fns":4,"./pack":8}],8:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
var Histogram = _dereq_('./histogram'); | ||
@@ -350,34 +635,9 @@ var config = _dereq_('../config'); | ||
},{"../config":1,"../encoding":2,"./histogram":5}],8:[function(_dereq_,module,exports){ | ||
},{"../config":1,"../encoding":2,"./histogram":5}],9:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; | ||
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); | ||
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } | ||
var fns = _dereq_('./fns'); | ||
var _pack = _dereq_('./pack'); | ||
var config = _dereq_('../config'); | ||
// const pack = require('./packer'); | ||
var methods = _dereq_('./methods'); | ||
/* | ||
org.startpad.trie - A JavaScript implementation of a Trie search datastructure. | ||
Usage: | ||
trie = new Trie(dictionary-string); | ||
bool = trie.has(word); | ||
To use a packed (compressed) version of the trie stored as a string: | ||
compressed = trie.pack(); | ||
ptrie = new PackedTrie(compressed); | ||
bool = ptrie.has(word) | ||
Node structure: | ||
Each node of the Trie is an Object that can contain the following properties: | ||
A JavaScript implementation of a Trie search datastructure. | ||
Each node of the Trie is an Object that can contain the following properties: | ||
'' - If present (with value == 1), the node is a Terminal Node - the prefix | ||
@@ -397,10 +657,3 @@ leading to this node is a word in the dictionary. | ||
*/ | ||
// Create a Trie data structure for searching for membership of strings | ||
// in a dictionary in a very space efficient way. | ||
var Trie = function () { | ||
function Trie(words) { | ||
_classCallCheck(this, Trie); | ||
var Trie = function Trie(words) { | ||
this.root = {}; | ||
@@ -414,309 +667,112 @@ this.lastWord = ''; | ||
this.vCur = 0; | ||
} | ||
}; | ||
Object.keys(methods).forEach(function (k) { | ||
Trie.prototype[k] = methods[k]; | ||
}); | ||
module.exports = Trie; | ||
// Insert words from one big string, or from an array. | ||
},{"./methods":7}],10:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
var Ptrie = _dereq_('./ptrie'); | ||
_createClass(Trie, [{ | ||
key: 'insertWords', | ||
value: function insertWords(words) { | ||
if (words === undefined) { | ||
return; | ||
} | ||
if (typeof words === 'string') { | ||
words = words.split(/[^a-zA-Z]+/); | ||
} | ||
for (var i = 0; i < words.length; i++) { | ||
words[i] = words[i].toLowerCase(); | ||
} | ||
fns.unique(words); | ||
for (var _i = 0; _i < words.length; _i++) { | ||
if (words[_i].match(config.NOT_ALLOWED) === null) { | ||
this.insert(words[_i]); | ||
} | ||
} | ||
} | ||
}, { | ||
key: 'insert', | ||
value: function insert(word) { | ||
this._insert(word, this.root); | ||
var lastWord = this.lastWord; | ||
this.lastWord = word; | ||
module.exports = function (str) { | ||
return new Ptrie(str); | ||
}; | ||
var prefix = fns.commonPrefix(word, lastWord); | ||
if (prefix === lastWord) { | ||
return; | ||
} | ||
},{"./ptrie":13}],11:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
var freeze = this.uniqueNode(lastWord, word, this.root); | ||
if (freeze) { | ||
this.combineSuffixNode(freeze); | ||
} | ||
} | ||
}, { | ||
key: '_insert', | ||
value: function _insert(word, node) { | ||
var prefix = void 0, | ||
next = void 0; | ||
var encoding = _dereq_('../encoding'); | ||
var isPrefix = _dereq_('./prefix'); | ||
var unravel = _dereq_('./unravel'); | ||
// Duplicate word entry - ignore | ||
if (word.length === 0) { | ||
return; | ||
} | ||
// Do any existing props share a common prefix? | ||
var keys = Object.keys(node); | ||
for (var i = 0; i < keys.length; i++) { | ||
var prop = keys[i]; | ||
prefix = fns.commonPrefix(word, prop); | ||
if (prefix.length === 0) { | ||
continue; | ||
} | ||
// Prop is a proper prefix - recurse to child node | ||
if (prop === prefix && _typeof(node[prop]) === 'object') { | ||
this._insert(word.slice(prefix.length), node[prop]); | ||
return; | ||
} | ||
// Duplicate terminal string - ignore | ||
if (prop === word && typeof node[prop] === 'number') { | ||
return; | ||
} | ||
next = {}; | ||
next[prop.slice(prefix.length)] = node[prop]; | ||
this.addTerminal(next, word = word.slice(prefix.length)); | ||
delete node[prop]; | ||
node[prefix] = next; | ||
this.wordCount++; | ||
return; | ||
} | ||
// No shared prefix. Enter the word here as a terminal string. | ||
this.addTerminal(node, word); | ||
this.wordCount++; | ||
var methods = { | ||
// Return largest matching string in the dictionary (or '') | ||
has: function has(want) { | ||
//fail-fast | ||
if (!want) { | ||
return false; | ||
} | ||
// Add a terminal string to node. | ||
// If 2 characters or less, just add with value == 1. | ||
// If more than 2 characters, point to shared node | ||
// Note - don't prematurely share suffixes - these | ||
// terminals may become split and joined with other | ||
// nodes in this part of the tree. | ||
}, { | ||
key: 'addTerminal', | ||
value: function addTerminal(node, prop) { | ||
if (prop.length <= 1) { | ||
node[prop] = 1; | ||
return; | ||
} | ||
var next = {}; | ||
node[prop[0]] = next; | ||
this.addTerminal(next, prop.slice(1)); | ||
//then, try cache-lookup | ||
if (this._cache) { | ||
return this._cache[want] || false; | ||
} | ||
// Well ordered list of properties in a node (string or object properties) | ||
// Use nodesOnly==true to return only properties of child nodes (not | ||
// terminal strings. | ||
}, { | ||
key: 'nodeProps', | ||
value: function nodeProps(node, nodesOnly) { | ||
var props = []; | ||
for (var prop in node) { | ||
if (prop !== '' && prop[0] !== '_') { | ||
if (!nodesOnly || _typeof(node[prop]) === 'object') { | ||
props.push(prop); | ||
} | ||
var self = this; | ||
var crawl = function crawl(index, prefix) { | ||
var node = self.nodes[index]; | ||
//the '!' means a prefix-alone is a good match | ||
if (node[0] === '!') { | ||
//try to match the prefix (the last branch) | ||
if (prefix === want) { | ||
return true; | ||
} | ||
node = node.slice(1); //ok, we tried. remove it. | ||
} | ||
props.sort(); | ||
return props; | ||
} | ||
}, { | ||
key: 'optimize', | ||
value: function optimize() { | ||
this.combineSuffixNode(this.root); | ||
this.prepDFS(); | ||
this.countDegree(this.root); | ||
this.prepDFS(); | ||
this.collapseChains(this.root); | ||
} | ||
// Convert Trie to a DAWG by sharing identical nodes | ||
}, { | ||
key: 'combineSuffixNode', | ||
value: function combineSuffixNode(node) { | ||
// Frozen node - can't change. | ||
if (node._c) { | ||
return node; | ||
} | ||
// Make sure all children are combined and generate unique node | ||
// signature for this node. | ||
var sig = []; | ||
if (this.isTerminal(node)) { | ||
sig.push('!'); | ||
} | ||
var props = this.nodeProps(node); | ||
for (var i = 0; i < props.length; i++) { | ||
var prop = props[i]; | ||
if (_typeof(node[prop]) === 'object') { | ||
node[prop] = this.combineSuffixNode(node[prop]); | ||
sig.push(prop); | ||
sig.push(node[prop]._c); | ||
} else { | ||
sig.push(prop); | ||
//each possible match on this line is something like 'me,me2,me4'. | ||
//try each one | ||
var matches = node.split(/([A-Z0-9,]+)/g); | ||
for (var i = 0; i < matches.length; i += 2) { | ||
var str = matches[i]; | ||
var ref = matches[i + 1]; | ||
if (!str) { | ||
continue; | ||
} | ||
} | ||
sig = sig.join('-'); | ||
var shared = this.suffixes[sig]; | ||
if (shared) { | ||
return shared; | ||
} | ||
this.suffixes[sig] = node; | ||
node._c = this.cNext++; | ||
return node; | ||
} | ||
}, { | ||
key: 'prepDFS', | ||
value: function prepDFS() { | ||
this.vCur++; | ||
} | ||
}, { | ||
key: 'visited', | ||
value: function visited(node) { | ||
if (node._v === this.vCur) { | ||
return true; | ||
} | ||
node._v = this.vCur; | ||
return false; | ||
} | ||
}, { | ||
key: 'countDegree', | ||
value: function countDegree(node) { | ||
if (node._d === undefined) { | ||
node._d = 0; | ||
} | ||
node._d++; | ||
if (this.visited(node)) { | ||
return; | ||
} | ||
var props = this.nodeProps(node, true); | ||
for (var i = 0; i < props.length; i++) { | ||
this.countDegree(node[props[i]]); | ||
} | ||
} | ||
// Remove intermediate singleton nodes by hoisting into their parent | ||
}, { | ||
key: 'collapseChains', | ||
value: function collapseChains(node) { | ||
var prop = void 0, | ||
props = void 0, | ||
child = void 0, | ||
i = void 0; | ||
if (this.visited(node)) { | ||
return; | ||
} | ||
props = this.nodeProps(node); | ||
for (i = 0; i < props.length; i++) { | ||
prop = props[i]; | ||
child = node[prop]; | ||
if ((typeof child === 'undefined' ? 'undefined' : _typeof(child)) !== 'object') { | ||
var have = prefix + str; | ||
//we're at the branch's end, so try to match it | ||
if (ref === ',' || ref === undefined) { | ||
if (have === want) { | ||
return true; | ||
} | ||
continue; | ||
} | ||
this.collapseChains(child); | ||
// Hoist the singleton child's single property to the parent | ||
if (child._g !== undefined && (child._d === 1 || child._g.length === 1)) { | ||
delete node[prop]; | ||
prop += child._g; | ||
node[prop] = child[child._g]; | ||
//ok, not a match. | ||
//well, should we keep going on this branch? | ||
//if we do, we ignore all the others here. | ||
if (isPrefix(have, want)) { | ||
index = self.indexFromRef(ref, index); | ||
return crawl(index, have); | ||
} | ||
//nah, lets try the next branch.. | ||
continue; | ||
} | ||
// Identify singleton nodes | ||
if (props.length === 1 && !this.isTerminal(node)) { | ||
node._g = prop; | ||
} | ||
} | ||
}, { | ||
key: 'has', | ||
value: function has(word) { | ||
return this.isFragment(word, this.root); | ||
} | ||
}, { | ||
key: 'isTerminal', | ||
value: function isTerminal(node) { | ||
return !!node['']; | ||
} | ||
}, { | ||
key: 'isFragment', | ||
value: function isFragment(word, node) { | ||
if (word.length === 0) { | ||
return this.isTerminal(node); | ||
} | ||
if (node[word] === 1) { | ||
return true; | ||
} | ||
return false; | ||
}; | ||
return crawl(0, ''); | ||
}, | ||
// Find a prefix of word reference to a child | ||
var props = this.nodeProps(node, true); | ||
for (var i = 0; i < props.length; i++) { | ||
var prop = props[i]; | ||
if (prop === word.slice(0, prop.length)) { | ||
return this.isFragment(word.slice(prop.length), node[prop]); | ||
} | ||
} | ||
return false; | ||
// References are either absolute (symbol) or relative (1 - based) | ||
indexFromRef: function indexFromRef(ref, index) { | ||
var dnode = encoding.fromAlphaCode(ref); | ||
if (dnode < this.symCount) { | ||
return this.syms[dnode]; | ||
} | ||
return index + dnode + 1 - this.symCount; | ||
}, | ||
// Find highest node in Trie that is on the path to word | ||
// and that is NOT on the path to other. | ||
toArray: function toArray() { | ||
return Object.keys(this.toObject()); | ||
}, | ||
}, { | ||
key: 'uniqueNode', | ||
value: function uniqueNode(word, other, node) { | ||
var props = this.nodeProps(node, true); | ||
for (var i = 0; i < props.length; i++) { | ||
var prop = props[i]; | ||
if (prop === word.slice(0, prop.length)) { | ||
if (prop !== other.slice(0, prop.length)) { | ||
return node[prop]; | ||
} | ||
return this.uniqueNode(word.slice(prop.length), other.slice(prop.length), node[prop]); | ||
} | ||
} | ||
return undefined; | ||
toObject: function toObject() { | ||
if (this._cache) { | ||
return this._cache; | ||
} | ||
}, { | ||
key: 'pack', | ||
value: function pack() { | ||
return _pack(this); | ||
} | ||
}]); | ||
return unravel(this); | ||
}, | ||
return Trie; | ||
}(); | ||
module.exports = Trie; | ||
},{"../config":1,"./fns":4,"./pack":7}],9:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
var Ptrie = _dereq_('./ptrie'); | ||
var unpack = function unpack(str) { | ||
return new Ptrie(str); | ||
cache: function cache() { | ||
this._cache = unravel(this); | ||
this.nodes = null; | ||
this.syms = null; | ||
} | ||
}; | ||
module.exports = unpack; | ||
module.exports = methods; | ||
},{"./ptrie":11}],10:[function(_dereq_,module,exports){ | ||
},{"../encoding":2,"./prefix":12,"./unravel":15}],12:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
//are we on the right path with this string? | ||
var isPrefix = function isPrefix(str, want) { | ||
module.exports = function (str, want) { | ||
//allow perfect equals | ||
@@ -737,159 +793,59 @@ if (str === want) { | ||
}; | ||
module.exports = isPrefix; | ||
// console.log(isPrefix('harvar', 'harvard')); | ||
// console.log(module.exports('harvar', 'harvard')); | ||
},{}],11:[function(_dereq_,module,exports){ | ||
},{}],13:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); | ||
var parseSymbols = _dereq_('./symbols'); | ||
var methods = _dereq_('./methods'); | ||
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } | ||
var encoding = _dereq_('../encoding'); | ||
var isPrefix = _dereq_('./prefix'); | ||
var unravel = _dereq_('./unravel'); | ||
//PackedTrie - Trie traversal of the Trie packed-string representation. | ||
var PackedTrie = function () { | ||
function PackedTrie(str) { | ||
_classCallCheck(this, PackedTrie); | ||
this.nodes = str.split(';'); //that's all ;)! | ||
this.syms = []; | ||
this.symCount = 0; | ||
this._cache = null; | ||
//process symbols, if they have them | ||
if (str.match(':')) { | ||
this.initSymbols(); | ||
} | ||
var PackedTrie = function PackedTrie(str) { | ||
this.nodes = str.split(';'); //that's all ;)! | ||
this.syms = []; | ||
this.symCount = 0; | ||
this._cache = null; | ||
//process symbols, if they have them | ||
if (str.match(':')) { | ||
parseSymbols(this); | ||
} | ||
}; | ||
//the symbols are at the top of the array. | ||
Object.keys(methods).forEach(function (k) { | ||
PackedTrie.prototype[k] = methods[k]; | ||
}); | ||
module.exports = PackedTrie; | ||
_createClass(PackedTrie, [{ | ||
key: 'initSymbols', | ||
value: function initSymbols() { | ||
//... process these lines | ||
var reSymbol = new RegExp('([0-9A-Z]+):([0-9A-Z]+)'); | ||
for (var i = 0; i < this.nodes.length; i++) { | ||
var m = reSymbol.exec(this.nodes[i]); | ||
if (!m) { | ||
this.symCount = i; | ||
break; | ||
} | ||
this.syms[encoding.fromAlphaCode(m[1])] = encoding.fromAlphaCode(m[2]); | ||
} | ||
//remove from main node list | ||
this.nodes = this.nodes.slice(this.symCount, this.nodes.length); | ||
} | ||
},{"./methods":11,"./symbols":14}],14:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
// Return largest matching string in the dictionary (or '') | ||
var encoding = _dereq_('../encoding'); | ||
}, { | ||
key: 'has', | ||
value: function has(want) { | ||
var _this = this; | ||
//fail-fast | ||
if (!want) { | ||
return false; | ||
} | ||
//then, try cache-lookup | ||
if (this._cache) { | ||
return this._cache[want] || false; | ||
} | ||
var crawl = function crawl(index, prefix) { | ||
var node = _this.nodes[index]; | ||
//the '!' means a prefix-alone is a good match | ||
if (node[0] === '!') { | ||
//try to match the prefix (the last branch) | ||
if (prefix === want) { | ||
return true; | ||
} | ||
node = node.slice(1); //ok, we tried. remove it. | ||
} | ||
//each possible match on this line is something like 'me,me2,me4'. | ||
//try each one | ||
var matches = node.split(/([A-Z0-9,]+)/g); | ||
for (var i = 0; i < matches.length; i += 2) { | ||
var str = matches[i]; | ||
var ref = matches[i + 1]; | ||
if (!str) { | ||
continue; | ||
} | ||
var have = prefix + str; | ||
//we're at the branch's end, so try to match it | ||
if (ref === ',' || ref === undefined) { | ||
if (have === want) { | ||
return true; | ||
} | ||
continue; | ||
} | ||
//ok, not a match. | ||
//well, should we keep going on this branch? | ||
//if we do, we ignore all the others here. | ||
if (isPrefix(have, want)) { | ||
index = _this.indexFromRef(ref, index); | ||
return crawl(index, have); | ||
} | ||
//nah, lets try the next branch.. | ||
continue; | ||
} | ||
return false; | ||
}; | ||
return crawl(0, ''); | ||
//the symbols are at the top of the array. | ||
module.exports = function (t) { | ||
//... process these lines | ||
var reSymbol = new RegExp('([0-9A-Z]+):([0-9A-Z]+)'); | ||
for (var i = 0; i < t.nodes.length; i++) { | ||
var m = reSymbol.exec(t.nodes[i]); | ||
if (!m) { | ||
t.symCount = i; | ||
break; | ||
} | ||
t.syms[encoding.fromAlphaCode(m[1])] = encoding.fromAlphaCode(m[2]); | ||
} | ||
//remove from main node list | ||
t.nodes = t.nodes.slice(t.symCount, t.nodes.length); | ||
}; | ||
// References are either absolute (symbol) or relative (1 - based) | ||
}, { | ||
key: 'indexFromRef', | ||
value: function indexFromRef(ref, index) { | ||
var dnode = encoding.fromAlphaCode(ref); | ||
if (dnode < this.symCount) { | ||
return this.syms[dnode]; | ||
} | ||
return index + dnode + 1 - this.symCount; | ||
} | ||
}, { | ||
key: 'toArray', | ||
value: function toArray() { | ||
return Object.keys(this.toObject()); | ||
} | ||
}, { | ||
key: 'toObject', | ||
value: function toObject() { | ||
if (this._cache) { | ||
return this._cache; | ||
} | ||
return unravel(this); | ||
} | ||
}, { | ||
key: 'cache', | ||
value: function cache() { | ||
this._cache = unravel(this); | ||
this.nodes = null; | ||
this.syms = null; | ||
} | ||
}]); | ||
return PackedTrie; | ||
}(); | ||
module.exports = PackedTrie; | ||
},{"../encoding":2,"./prefix":10,"./unravel":12}],12:[function(_dereq_,module,exports){ | ||
},{"../encoding":2}],15:[function(_dereq_,module,exports){ | ||
'use strict'; | ||
//spin-out all words from this trie | ||
var unRavel = function unRavel(trie) { | ||
module.exports = function (trie) { | ||
var all = {}; | ||
var crawl = function crawl(index, prefix) { | ||
var crawl = function crawl(index, pref) { | ||
var node = trie.nodes[index]; | ||
if (node[0] === '!') { | ||
all[prefix] = true; | ||
all[pref] = true; | ||
node = node.slice(1); //ok, we tried. remove it. | ||
@@ -905,3 +861,3 @@ } | ||
var have = prefix + str; | ||
var have = pref + str; | ||
//branch's end | ||
@@ -919,5 +875,4 @@ if (ref === ',' || ref === undefined) { | ||
}; | ||
module.exports = unRavel; | ||
},{}]},{},[3])(3) | ||
}); |
@@ -1,2 +0,2 @@ | ||
/* efrt trie-compression v0.0.5 github.com/nlp-compromise/efrt - MIT */ | ||
!function(t){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=t();else if("function"==typeof define&&define.amd)define([],t);else{var e;e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,e.efrt=t()}}(function(){return function t(e,n,i){function r(s,u){if(!n[s]){if(!e[s]){var f="function"==typeof require&&require;if(!u&&f)return f(s,!0);if(o)return o(s,!0);var a=new Error("Cannot find module '"+s+"'");throw a.code="MODULE_NOT_FOUND",a}var c=n[s]={exports:{}};e[s][0].call(c.exports,function(t){var n=e[s][1][t];return r(n?n:t)},c,c.exports,t,e,n,i)}return n[s].exports}for(var o="function"==typeof require&&require,s=0;s<i.length;s++)r(i[s]);return r}({1:[function(t,e,n){"use strict";e.exports={NODE_SEP:";",STRING_SEP:",",TERMINAL_PREFIX:"!",NOT_ALLOWED:new RegExp("[0-9A-Z,;!]"),BASE:36}},{}],2:[function(t,e,n){"use strict";var i=36,r="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",o=r.split("").reduce(function(t,e,n){return t[e]=n,t},{}),s=function(t){if(void 0!==r[t])return r[t];for(var e=1,n=i,o="";t>=n;t-=n,e++,n*=i);for(;e--;){var s=t%i;o=String.fromCharCode((s<10?48:55)+s)+o,t=(t-s)/i}return o},u=function(t){if(void 0!==o[t])return o[t];for(var e=0,n=1,r=i,s=1;n<t.length;e+=r,n++,r*=i);for(var u=t.length-1;u>=0;u--,s*=i){var f=t.charCodeAt(u)-48;f>10&&(f-=7),e+=f*s}return e};e.exports={toAlphaCode:s,fromAlphaCode:u}},{}],3:[function(t,e,n){"use strict";e.exports={pack:t("./pack/index"),unpack:t("./unpack/index")}},{"./pack/index":6,"./unpack/index":9}],4:[function(t,e,n){"use strict";var i=function(t,e){for(var n=Math.min(t.length,e.length);n>0;){var i=t.slice(0,n);if(i===e.slice(0,n))return i;n-=1}return""},r=function(t){t.sort();for(var e=1;e<t.length;e++)t[e-1]===t[e]&&t.splice(e,1)};e.exports={commonPrefix:i,unique:r}},{}],5:[function(t,e,n){"use strict";function i(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}var r=function(){function t(t,e){for(var n=0;n<e.length;n++){var i=e[n];i.enumerable=i.enumerable||!1,i.configurable=!0,"value"in i&&(i.writable=!0),Object.defineProperty(t,i.key,i)}}return function(e,n,i){return n&&t(e.prototype,n),i&&t(e,i),e}}(),o=function(){function t(){i(this,t),this.counts={}}return r(t,[{key:"init",value:function(t){void 0===this.counts[t]&&(this.counts[t]=0)}},{key:"add",value:function(t,e){void 0===e&&(e=1),this.init(t),this.counts[t]+=e}},{key:"change",value:function(t,e,n){void 0===n&&(n=1),this.add(e,-n),this.add(t,n)}},{key:"countOf",value:function(t){return this.init(t),this.counts[t]}},{key:"highest",value:function(t){for(var e=[],n=Object.keys(this.counts),i=0;i<n.length;i++){var r=n[i];e.push([r,this.counts[r]])}return e.sort(function(t,e){return e[1]-t[1]}),t&&(e=e.slice(0,t)),e}}]),t}();e.exports=o},{}],6:[function(t,e,n){"use strict";var i=t("./trie"),r=function(t){var e=new i(t);return e.pack()};e.exports=r},{"./trie":8}],7:[function(t,e,n){"use strict";var i=t("./histogram"),r=t("../config"),o=t("../encoding"),s=function(t,e){var n="",i="";t.isTerminal(e)&&(n+=r.TERMINAL_PREFIX);for(var s=t.nodeProps(e),u=0;u<s.length;u++){var f=s[u];if("number"!=typeof e[f])if(t.syms[e[f]._n])n+=i+f+t.syms[e[f]._n],i="";else{var a=o.toAlphaCode(e._n-e[f]._n-1+t.symCount);e[f]._g&&a.length>=e[f]._g.length&&1===e[e[f]._g]?(a=e[f]._g,n+=i+f+a,i=r.STRING_SEP):(n+=i+f+a,i="")}else n+=i+f,i=r.STRING_SEP}return n},u=function t(e,n){if(!e.visited(n))for(var i=e.nodeProps(n,!0),s=0;s<i.length;s++){var u=i[s],f=n._n-n[u]._n-1;f<r.BASE&&e.histRel.add(f),e.histAbs.add(n[u]._n,o.toAlphaCode(f).length-1),t(e,n[u])}},f=function(t){t.histAbs=t.histAbs.highest(r.BASE);var e=[];e[-1]=0;for(var n=0,i=0,s=3+o.toAlphaCode(t.nodeCount).length,u=0;u<r.BASE&&void 0!==t.histAbs[u];u++)e[u]=t.histAbs[u][1]-s-t.histRel.countOf(r.BASE-u-1)+e[u-1],e[u]>=n&&(n=e[u],i=u+1);return i},a=function t(e,n){if(void 0===n._n){for(var i=e.nodeProps(n,!0),r=0;r<i.length;r++)t(e,n[i[r]]);n._n=e.pos++,e.nodes.unshift(n)}},c=function(t){t.nodes=[],t.nodeCount=0,t.syms={},t.symCount=0,t.pos=0,t.optimize(),t.histAbs=new i,t.histRel=new i,a(t,t.root),t.nodeCount=t.nodes.length,t.prepDFS(),u(t,t.root),t.symCount=f(t);for(var e=0;e<t.symCount;e++)t.syms[t.histAbs[e][0]]=o.toAlphaCode(e);for(var n=0;n<t.nodeCount;n++)t.nodes[n]=s(t,t.nodes[n]);for(var c=t.symCount-1;c>=0;c--)t.nodes.unshift(o.toAlphaCode(c)+":"+o.toAlphaCode(t.nodeCount-t.histAbs[c][0]-1));return t.nodes.join(r.NODE_SEP)};e.exports=c},{"../config":1,"../encoding":2,"./histogram":5}],8:[function(t,e,n){"use strict";function i(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}var r="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},o=function(){function t(t,e){for(var n=0;n<e.length;n++){var i=e[n];i.enumerable=i.enumerable||!1,i.configurable=!0,"value"in i&&(i.writable=!0),Object.defineProperty(t,i.key,i)}}return function(e,n,i){return n&&t(e.prototype,n),i&&t(e,i),e}}(),s=t("./fns"),u=t("./pack"),f=t("../config"),a=function(){function t(e){i(this,t),this.root={},this.lastWord="",this.suffixes={},this.suffixCounts={},this.cNext=1,this.wordCount=0,this.insertWords(e),this.vCur=0}return o(t,[{key:"insertWords",value:function(t){if(void 0!==t){"string"==typeof t&&(t=t.split(/[^a-zA-Z]+/));for(var e=0;e<t.length;e++)t[e]=t[e].toLowerCase();s.unique(t);for(var n=0;n<t.length;n++)null===t[n].match(f.NOT_ALLOWED)&&this.insert(t[n])}}},{key:"insert",value:function(t){this._insert(t,this.root);var e=this.lastWord;this.lastWord=t;var n=s.commonPrefix(t,e);if(n!==e){var i=this.uniqueNode(e,t,this.root);i&&this.combineSuffixNode(i)}}},{key:"_insert",value:function(t,e){var n=void 0,i=void 0;if(0!==t.length){for(var o=Object.keys(e),u=0;u<o.length;u++){var f=o[u];if(n=s.commonPrefix(t,f),0!==n.length){if(f===n&&"object"===r(e[f]))return void this._insert(t.slice(n.length),e[f]);if(f===t&&"number"==typeof e[f])return;return i={},i[f.slice(n.length)]=e[f],this.addTerminal(i,t=t.slice(n.length)),delete e[f],e[n]=i,void this.wordCount++}}this.addTerminal(e,t),this.wordCount++}}},{key:"addTerminal",value:function(t,e){if(e.length<=1)return void(t[e]=1);var n={};t[e[0]]=n,this.addTerminal(n,e.slice(1))}},{key:"nodeProps",value:function(t,e){var n=[];for(var i in t)""!==i&&"_"!==i[0]&&(e&&"object"!==r(t[i])||n.push(i));return n.sort(),n}},{key:"optimize",value:function(){this.combineSuffixNode(this.root),this.prepDFS(),this.countDegree(this.root),this.prepDFS(),this.collapseChains(this.root)}},{key:"combineSuffixNode",value:function(t){if(t._c)return t;var e=[];this.isTerminal(t)&&e.push("!");for(var n=this.nodeProps(t),i=0;i<n.length;i++){var o=n[i];"object"===r(t[o])?(t[o]=this.combineSuffixNode(t[o]),e.push(o),e.push(t[o]._c)):e.push(o)}e=e.join("-");var s=this.suffixes[e];return s?s:(this.suffixes[e]=t,t._c=this.cNext++,t)}},{key:"prepDFS",value:function(){this.vCur++}},{key:"visited",value:function(t){return t._v===this.vCur||(t._v=this.vCur,!1)}},{key:"countDegree",value:function(t){if(void 0===t._d&&(t._d=0),t._d++,!this.visited(t))for(var e=this.nodeProps(t,!0),n=0;n<e.length;n++)this.countDegree(t[e[n]])}},{key:"collapseChains",value:function(t){var e=void 0,n=void 0,i=void 0,o=void 0;if(!this.visited(t)){for(n=this.nodeProps(t),o=0;o<n.length;o++)e=n[o],i=t[e],"object"===("undefined"==typeof i?"undefined":r(i))&&(this.collapseChains(i),void 0===i._g||1!==i._d&&1!==i._g.length||(delete t[e],e+=i._g,t[e]=i[i._g]));1!==n.length||this.isTerminal(t)||(t._g=e)}}},{key:"has",value:function(t){return this.isFragment(t,this.root)}},{key:"isTerminal",value:function(t){return!!t[""]}},{key:"isFragment",value:function(t,e){if(0===t.length)return this.isTerminal(e);if(1===e[t])return!0;for(var n=this.nodeProps(e,!0),i=0;i<n.length;i++){var r=n[i];if(r===t.slice(0,r.length))return this.isFragment(t.slice(r.length),e[r])}return!1}},{key:"uniqueNode",value:function(t,e,n){for(var i=this.nodeProps(n,!0),r=0;r<i.length;r++){var o=i[r];if(o===t.slice(0,o.length))return o!==e.slice(0,o.length)?n[o]:this.uniqueNode(t.slice(o.length),e.slice(o.length),n[o])}}},{key:"pack",value:function(){return u(this)}}]),t}();e.exports=a},{"../config":1,"./fns":4,"./pack":7}],9:[function(t,e,n){"use strict";var i=t("./ptrie"),r=function(t){return new i(t)};e.exports=r},{"./ptrie":11}],10:[function(t,e,n){"use strict";var i=function(t,e){if(t===e)return!0;var n=t.length;return!(n>=e.length)&&(1===n?t===e[0]:e.slice(0,n)===t)};e.exports=i},{}],11:[function(t,e,n){"use strict";function i(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}var r=function(){function t(t,e){for(var n=0;n<e.length;n++){var i=e[n];i.enumerable=i.enumerable||!1,i.configurable=!0,"value"in i&&(i.writable=!0),Object.defineProperty(t,i.key,i)}}return function(e,n,i){return n&&t(e.prototype,n),i&&t(e,i),e}}(),o=t("../encoding"),s=t("./prefix"),u=t("./unravel"),f=function(){function t(e){i(this,t),this.nodes=e.split(";"),this.syms=[],this.symCount=0,this._cache=null,e.match(":")&&this.initSymbols()}return r(t,[{key:"initSymbols",value:function(){for(var t=new RegExp("([0-9A-Z]+):([0-9A-Z]+)"),e=0;e<this.nodes.length;e++){var n=t.exec(this.nodes[e]);if(!n){this.symCount=e;break}this.syms[o.fromAlphaCode(n[1])]=o.fromAlphaCode(n[2])}this.nodes=this.nodes.slice(this.symCount,this.nodes.length)}},{key:"has",value:function(t){var e=this;if(!t)return!1;if(this._cache)return this._cache[t]||!1;var n=function n(i,r){var o=e.nodes[i];if("!"===o[0]){if(r===t)return!0;o=o.slice(1)}for(var u=o.split(/([A-Z0-9,]+)/g),f=0;f<u.length;f+=2){var a=u[f],c=u[f+1];if(a){var h=r+a;if(","!==c&&void 0!==c){if(s(h,t))return i=e.indexFromRef(c,i),n(i,h)}else if(h===t)return!0}}return!1};return n(0,"")}},{key:"indexFromRef",value:function(t,e){var n=o.fromAlphaCode(t);return n<this.symCount?this.syms[n]:e+n+1-this.symCount}},{key:"toArray",value:function(){return Object.keys(this.toObject())}},{key:"toObject",value:function(){return this._cache?this._cache:u(this)}},{key:"cache",value:function(){this._cache=u(this),this.nodes=null,this.syms=null}}]),t}();e.exports=f},{"../encoding":2,"./prefix":10,"./unravel":12}],12:[function(t,e,n){"use strict";var i=function(t){var e={},n=function n(i,r){var o=t.nodes[i];"!"===o[0]&&(e[r]=!0,o=o.slice(1));for(var s=o.split(/([A-Z0-9,]+)/g),u=0;u<s.length;u+=2){var f=s[u],a=s[u+1];if(f){var c=r+f;if(","!==a&&void 0!==a){var h=t.indexFromRef(a,i);n(h,c)}else e[c]=!0}}};return n(0,""),e};e.exports=i},{}]},{},[3])(3)}); | ||
/* efrt trie-compression v0.0.6 github.com/nlp-compromise/efrt - MIT */ | ||
!function(t){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=t();else if("function"==typeof define&&define.amd)define([],t);else{var n;n="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,n.efrt=t()}}(function(){var t;return function t(n,e,i){function o(s,u){if(!e[s]){if(!n[s]){var f="function"==typeof require&&require;if(!u&&f)return f(s,!0);if(r)return r(s,!0);var c=new Error("Cannot find module '"+s+"'");throw c.code="MODULE_NOT_FOUND",c}var h=e[s]={exports:{}};n[s][0].call(h.exports,function(t){var e=n[s][1][t];return o(e?e:t)},h,h.exports,t,n,e,i)}return e[s].exports}for(var r="function"==typeof require&&require,s=0;s<i.length;s++)o(i[s]);return o}({1:[function(t,n,e){"use strict";n.exports={NODE_SEP:";",STRING_SEP:",",TERMINAL_PREFIX:"!",NOT_ALLOWED:new RegExp("[0-9A-Z,;!]"),BASE:36}},{}],2:[function(t,n,e){"use strict";var i=36,o="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",r=o.split("").reduce(function(t,n,e){return t[n]=e,t},{}),s=function(t){if(void 0!==o[t])return o[t];for(var n=1,e=i,r="";t>=e;t-=e,n++,e*=i);for(;n--;){var s=t%i;r=String.fromCharCode((s<10?48:55)+s)+r,t=(t-s)/i}return r},u=function(t){if(void 0!==r[t])return r[t];for(var n=0,e=1,o=i,s=1;e<t.length;n+=o,e++,o*=i);for(var u=t.length-1;u>=0;u--,s*=i){var f=t.charCodeAt(u)-48;f>10&&(f-=7),n+=f*s}return n};n.exports={toAlphaCode:s,fromAlphaCode:u}},{}],3:[function(n,e,i){(function(i){"use strict";var o={pack:n("./pack/index"),unpack:n("./unpack/index")};"undefined"!=typeof self?self.efrt=o:"undefined"!=typeof window?window.efrt=o:"undefined"!=typeof i&&(i.efrt=o),"function"==typeof t&&t.amd&&t(o),"undefined"!=typeof e&&(e.exports=o)}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{"./pack/index":6,"./unpack/index":10}],4:[function(t,n,e){"use strict";var i=function(t,n){for(var e=Math.min(t.length,n.length);e>0;){var i=t.slice(0,e);if(i===n.slice(0,e))return i;e-=1}return""},o=function(t){t.sort();for(var n=1;n<t.length;n++)t[n-1]===t[n]&&t.splice(n,1)};n.exports={commonPrefix:i,unique:o}},{}],5:[function(t,n,e){"use strict";function i(t,n){if(!(t instanceof n))throw new TypeError("Cannot call a class as a function")}var o=function(){function t(t,n){for(var e=0;e<n.length;e++){var i=n[e];i.enumerable=i.enumerable||!1,i.configurable=!0,"value"in i&&(i.writable=!0),Object.defineProperty(t,i.key,i)}}return function(n,e,i){return e&&t(n.prototype,e),i&&t(n,i),n}}(),r=function(){function t(){i(this,t),this.counts={}}return o(t,[{key:"init",value:function(t){void 0===this.counts[t]&&(this.counts[t]=0)}},{key:"add",value:function(t,n){void 0===n&&(n=1),this.init(t),this.counts[t]+=n}},{key:"change",value:function(t,n,e){void 0===e&&(e=1),this.add(n,-e),this.add(t,e)}},{key:"countOf",value:function(t){return this.init(t),this.counts[t]}},{key:"highest",value:function(t){for(var n=[],e=Object.keys(this.counts),i=0;i<e.length;i++){var o=e[i];n.push([o,this.counts[o]])}return n.sort(function(t,n){return n[1]-t[1]}),t&&(n=n.slice(0,t)),n}}]),t}();n.exports=r},{}],6:[function(t,n,e){"use strict";var i=t("./trie"),o=function(t){var n=new i(t);return n.pack()};n.exports=o},{"./trie":9}],7:[function(t,n,e){"use strict";var i="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},o=t("./fns"),r=t("./pack"),s=t("../config");n.exports={insertWords:function(t){if(void 0!==t){"string"==typeof t&&(t=t.split(/[^a-zA-Z]+/));for(var n=0;n<t.length;n++)t[n]=t[n].toLowerCase();o.unique(t);for(var e=0;e<t.length;e++)null===t[e].match(s.NOT_ALLOWED)&&this.insert(t[e])}},insert:function(t){this._insert(t,this.root);var n=this.lastWord;this.lastWord=t;var e=o.commonPrefix(t,n);if(e!==n){var i=this.uniqueNode(n,t,this.root);i&&this.combineSuffixNode(i)}},_insert:function(t,n){var e=void 0,r=void 0;if(0!==t.length){for(var s=Object.keys(n),u=0;u<s.length;u++){var f=s[u];if(e=o.commonPrefix(t,f),0!==e.length){if(f===e&&"object"===i(n[f]))return void this._insert(t.slice(e.length),n[f]);if(f===t&&"number"==typeof n[f])return;return r={},r[f.slice(e.length)]=n[f],this.addTerminal(r,t=t.slice(e.length)),delete n[f],n[e]=r,void this.wordCount++}}this.addTerminal(n,t),this.wordCount++}},addTerminal:function(t,n){if(n.length<=1)return void(t[n]=1);var e={};t[n[0]]=e,this.addTerminal(e,n.slice(1))},nodeProps:function(t,n){var e=[];for(var o in t)""!==o&&"_"!==o[0]&&(n&&"object"!==i(t[o])||e.push(o));return e.sort(),e},optimize:function(){this.combineSuffixNode(this.root),this.prepDFS(),this.countDegree(this.root),this.prepDFS(),this.collapseChains(this.root)},combineSuffixNode:function(t){if(t._c)return t;var n=[];this.isTerminal(t)&&n.push("!");for(var e=this.nodeProps(t),o=0;o<e.length;o++){var r=e[o];"object"===i(t[r])?(t[r]=this.combineSuffixNode(t[r]),n.push(r),n.push(t[r]._c)):n.push(r)}n=n.join("-");var s=this.suffixes[n];return s?s:(this.suffixes[n]=t,t._c=this.cNext++,t)},prepDFS:function(){this.vCur++},visited:function(t){return t._v===this.vCur||(t._v=this.vCur,!1)},countDegree:function(t){if(void 0===t._d&&(t._d=0),t._d++,!this.visited(t))for(var n=this.nodeProps(t,!0),e=0;e<n.length;e++)this.countDegree(t[n[e]])},collapseChains:function(t){var n=void 0,e=void 0,o=void 0,r=void 0;if(!this.visited(t)){for(e=this.nodeProps(t),r=0;r<e.length;r++)n=e[r],o=t[n],"object"===("undefined"==typeof o?"undefined":i(o))&&(this.collapseChains(o),void 0===o._g||1!==o._d&&1!==o._g.length||(delete t[n],n+=o._g,t[n]=o[o._g]));1!==e.length||this.isTerminal(t)||(t._g=n)}},has:function(t){return this.isFragment(t,this.root)},isTerminal:function(t){return!!t[""]},isFragment:function(t,n){if(0===t.length)return this.isTerminal(n);if(1===n[t])return!0;for(var e=this.nodeProps(n,!0),i=0;i<e.length;i++){var o=e[i];if(o===t.slice(0,o.length))return this.isFragment(t.slice(o.length),n[o])}return!1},uniqueNode:function(t,n,e){for(var i=this.nodeProps(e,!0),o=0;o<i.length;o++){var r=i[o];if(r===t.slice(0,r.length))return r!==n.slice(0,r.length)?e[r]:this.uniqueNode(t.slice(r.length),n.slice(r.length),e[r])}},pack:function(){return r(this)}}},{"../config":1,"./fns":4,"./pack":8}],8:[function(t,n,e){"use strict";var i=t("./histogram"),o=t("../config"),r=t("../encoding"),s=function(t,n){var e="",i="";t.isTerminal(n)&&(e+=o.TERMINAL_PREFIX);for(var s=t.nodeProps(n),u=0;u<s.length;u++){var f=s[u];if("number"!=typeof n[f])if(t.syms[n[f]._n])e+=i+f+t.syms[n[f]._n],i="";else{var c=r.toAlphaCode(n._n-n[f]._n-1+t.symCount);n[f]._g&&c.length>=n[f]._g.length&&1===n[n[f]._g]?(c=n[f]._g,e+=i+f+c,i=o.STRING_SEP):(e+=i+f+c,i="")}else e+=i+f,i=o.STRING_SEP}return e},u=function t(n,e){if(!n.visited(e))for(var i=n.nodeProps(e,!0),s=0;s<i.length;s++){var u=i[s],f=e._n-e[u]._n-1;f<o.BASE&&n.histRel.add(f),n.histAbs.add(e[u]._n,r.toAlphaCode(f).length-1),t(n,e[u])}},f=function(t){t.histAbs=t.histAbs.highest(o.BASE);var n=[];n[-1]=0;for(var e=0,i=0,s=3+r.toAlphaCode(t.nodeCount).length,u=0;u<o.BASE&&void 0!==t.histAbs[u];u++)n[u]=t.histAbs[u][1]-s-t.histRel.countOf(o.BASE-u-1)+n[u-1],n[u]>=e&&(e=n[u],i=u+1);return i},c=function t(n,e){if(void 0===e._n){for(var i=n.nodeProps(e,!0),o=0;o<i.length;o++)t(n,e[i[o]]);e._n=n.pos++,n.nodes.unshift(e)}},h=function(t){t.nodes=[],t.nodeCount=0,t.syms={},t.symCount=0,t.pos=0,t.optimize(),t.histAbs=new i,t.histRel=new i,c(t,t.root),t.nodeCount=t.nodes.length,t.prepDFS(),u(t,t.root),t.symCount=f(t);for(var n=0;n<t.symCount;n++)t.syms[t.histAbs[n][0]]=r.toAlphaCode(n);for(var e=0;e<t.nodeCount;e++)t.nodes[e]=s(t,t.nodes[e]);for(var h=t.symCount-1;h>=0;h--)t.nodes.unshift(r.toAlphaCode(h)+":"+r.toAlphaCode(t.nodeCount-t.histAbs[h][0]-1));return t.nodes.join(o.NODE_SEP)};n.exports=h},{"../config":1,"../encoding":2,"./histogram":5}],9:[function(t,n,e){"use strict";var i=t("./methods"),o=function(t){this.root={},this.lastWord="",this.suffixes={},this.suffixCounts={},this.cNext=1,this.wordCount=0,this.insertWords(t),this.vCur=0};Object.keys(i).forEach(function(t){o.prototype[t]=i[t]}),n.exports=o},{"./methods":7}],10:[function(t,n,e){"use strict";var i=t("./ptrie");n.exports=function(t){return new i(t)}},{"./ptrie":13}],11:[function(t,n,e){"use strict";var i=t("../encoding"),o=t("./prefix"),r=t("./unravel"),s={has:function(t){if(!t)return!1;if(this._cache)return this._cache[t]||!1;var n=this,e=function e(i,r){var s=n.nodes[i];if("!"===s[0]){if(r===t)return!0;s=s.slice(1)}for(var u=s.split(/([A-Z0-9,]+)/g),f=0;f<u.length;f+=2){var c=u[f],h=u[f+1];if(c){var a=r+c;if(","!==h&&void 0!==h){if(o(a,t))return i=n.indexFromRef(h,i),e(i,a)}else if(a===t)return!0}}return!1};return e(0,"")},indexFromRef:function(t,n){var e=i.fromAlphaCode(t);return e<this.symCount?this.syms[e]:n+e+1-this.symCount},toArray:function(){return Object.keys(this.toObject())},toObject:function(){return this._cache?this._cache:r(this)},cache:function(){this._cache=r(this),this.nodes=null,this.syms=null}};n.exports=s},{"../encoding":2,"./prefix":12,"./unravel":15}],12:[function(t,n,e){"use strict";n.exports=function(t,n){if(t===n)return!0;var e=t.length;return!(e>=n.length)&&(1===e?t===n[0]:n.slice(0,e)===t)}},{}],13:[function(t,n,e){"use strict";var i=t("./symbols"),o=t("./methods"),r=function(t){this.nodes=t.split(";"),this.syms=[],this.symCount=0,this._cache=null,t.match(":")&&i(this)};Object.keys(o).forEach(function(t){r.prototype[t]=o[t]}),n.exports=r},{"./methods":11,"./symbols":14}],14:[function(t,n,e){"use strict";var i=t("../encoding");n.exports=function(t){for(var n=new RegExp("([0-9A-Z]+):([0-9A-Z]+)"),e=0;e<t.nodes.length;e++){var o=n.exec(t.nodes[e]);if(!o){t.symCount=e;break}t.syms[i.fromAlphaCode(o[1])]=i.fromAlphaCode(o[2])}t.nodes=t.nodes.slice(t.symCount,t.nodes.length)}},{"../encoding":2}],15:[function(t,n,e){"use strict";n.exports=function(t){var n={},e=function e(i,o){var r=t.nodes[i];"!"===r[0]&&(n[o]=!0,r=r.slice(1));for(var s=r.split(/([A-Z0-9,]+)/g),u=0;u<s.length;u+=2){var f=s[u],c=s[u+1];if(f){var h=o+f;if(","!==c&&void 0!==c){var a=t.indexFromRef(c,i);e(a,h)}else n[h]=!0}}};return e(0,""),n}},{}]},{},[3])(3)}); |
@@ -5,3 +5,3 @@ { | ||
"description": "compressed-trie data-structure", | ||
"version": "0.0.5", | ||
"version": "0.0.6", | ||
"main": "./builds/efrt.js", | ||
@@ -22,2 +22,4 @@ "repository": { | ||
"devDependencies": { | ||
"rollup-plugin-commonjs": "^8.0.2", | ||
"rollup-plugin-node-resolve": "^3.0.0", | ||
"babel-plugin-transform-es3-member-expression-literals": "^6.22.0", | ||
@@ -24,0 +26,0 @@ "babel-plugin-transform-es3-property-literals": "^6.22.0", |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
62200
8
1490
15