Socket
Socket
Sign inDemoInstall

suffix-thumb

Package Overview
Dependencies
Maintainers
1
Versions
17
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

suffix-thumb - npm Package Compare versions

Comparing version 3.1.5 to 4.0.0

builds/types.d.ts

2

builds/suffix-thumb-client.js

@@ -1,1 +0,1 @@

import{unpack as e}from"efrt";const t=/^.([0-9]+)/,n=function(e,n){if(n.exceptions.hasOwnProperty(e))return function(e,n){let r=n.exceptions[e],o=r.match(t);if(null===o)return n.exceptions[e];let l=Number(o[1])||0;return e.substr(0,l)+r.replace(t,"")}(e,n);const r=function(e,t){let n=e[e.length-1],r=t.rules[n]||[];return 0===r.length&&(r=t.rules[""]||r),r}(e,n);for(let t=0;t<r.length;t+=1){let n=r[t][0];if(e.endsWith(n)){let o=new RegExp(n+"$");return e.replace(o,r[t][1])}}return e},r=function(e,t={}){let n={},r={};return e=e.filter((e=>void 0!==n[e[0]]?(t.verbose&&(console.warn("Duplicate left side:"),console.log(" 1.",[e[0],n[e[0]]]),console.log(" 2.",e)),!1):void 0!==r[e[1]]?(t.verbose&&(console.warn("Duplicate right side:"),console.log(" 1.",[r[e[1]],e[1]]),console.log(" 2.",e)),!1===t.inverse):(n[e[0]]=e[1],r[e[1]]=e[0],!0)))},o=function(e){let t={};return e.forEach((e=>{let n=e[0]||"",r=n[n.length-1]||"";t[r]=t[r]||[],t[r].push(e)})),t},l=function(e){return e=e.sort(((e,t)=>e[0].length>t[0].length?-1:e[0].length<t[0].length?1:0))},s=/^.([0-9]+)/,c=function(e){return Object.keys(e).forEach((t=>{let n=e[t],r=n.match(s);if(null!==r){let o=Number(r[1])||0,l=t.substring(0,o)+n.replace(s,"");e[t]=l}})),e},i=function(t={}){var n;return"string"==typeof t.exceptions&&(t.exceptions=e(t.exceptions),t.exceptions=c(t.exceptions)),"string"==typeof t.rules&&(t.rules=(n=t.rules)?(n=e(n),n=c(n),n=Object.entries(n),n=l(n),n=o(n)):{}),t},u=function(e){let t=[];var n;return Object.keys(e.rules).forEach((n=>{t=t.concat(e.rules[n].map((e=>[e[1],e[0]])))})),t=l(t),{rules:o(t),exceptions:(n=e.exceptions,Object.entries(n).reduce(((e,t)=>(e[t[1]]=t[0],e)),{}))}};export{n as convert,u as reverse,i as uncompress,r as validate};
const e=/^.([0-9]+)/,t=function(t,n,r){if(n.exceptions.hasOwnProperty(t))return r&&console.log("exception, ",t,n.exceptions[t]),function(t,n){let r=n.exceptions[t],l=r.match(e);if(null===l)return n.exceptions[t];let o=Number(l[1])||0;return t.substr(0,o)+r.replace(e,"")}(t,n);let l=n.rules;n.reversed&&(l=n.rev),l=function(e,t={}){let n=t[e[e.length-1]]||[];return t[""]&&(n=n.concat(t[""])),n}(t,l);for(let e=0;e<l.length;e+=1){let n=l[e][0];if(t.endsWith(n)){r&&console.log("rule, ",l[e]);let o=new RegExp(n+"$");return t.replace(o,l[e][1])}}return r&&console.log(" x - "+t),t},n=function(e,t={}){let n={},r={};return e=e.filter((e=>void 0!==n[e[0]]?(t.debug&&(console.warn("Duplicate left side:"),console.log(" 1.",[e[0],n[e[0]]]),console.log(" 2.",e)),!1):void 0!==r[e[1]]?(t.debug&&(console.warn("Duplicate right side:"),console.log(" 1.",[r[e[1]],e[1]]),console.log(" 2.",e)),!1===t.inverse):(n[e[0]]=e[1],r[e[1]]=e[0],!0)))},r=function(e){let t={};return e.forEach((e=>{let n=e[0]||"",r=n[n.length-1]||"";t[r]=t[r]||[],t[r].push(e)})),t},l=/^([0-9]+)/,o=function(e){const t=/\|/;return e.split(/,/).map((e=>{let n=e.split(t);return function(e="",t=""){let n=(t=String(t)).match(l);if(null===n)return[e,t];let r=Number(n[1])||0,o=e.substring(0,r);return[e,o+t.replace(l,"")]}(n[0],n[1])}))},s=function(e={}){return(e=Object.assign({},e)).rules=o(e.rules),e.rules=r(e.rules),e.rev=o(e.rev),e.rev=r(e.rev),e.exceptions=o(e.exceptions),e.exceptions=e.exceptions.reduce(((e,t)=>(e[t[0]]=t[1],e)),{}),e},u=function(e){let{rules:t,exceptions:n,rev:r}=e;var l;return l=n,n=Object.entries(l).reduce(((e,t)=>(e[t[1]]=t[0],e)),{}),{reversed:!Boolean(e.reversed),rules:t,exceptions:n,rev:r}},i=function(e,t){return t[e[e.length-1]]||[]},c=function(e,t,n){const r="Left",l="Right";if(t.exceptions.hasOwnProperty(e))return r;let o=Object.entries(t.exceptions);for(let t=0;t<o.length;t+=1)if(o[t][1]===e)return l;let s=i(e,t.rules);for(let t=0;t<s.length;t+=1)if(e.endsWith(s[t][0]))return r;s=i(e,t.rev);for(let t=0;t<s.length;t+=1)if(e.endsWith(s[t][0]))return l;s=i(e,t.rules);for(let t=0;t<s.length;t+=1)if(e.endsWith(s[t][1]))return l;s=i(e,t.rev);for(let t=0;t<s.length;t+=1)if(e.endsWith(s[t][1]))return r;return null};export{c as classify,t as convert,u as reverse,s as uncompress,n as validate};

@@ -1,7 +0,7 @@

/* suffix-thumb 3.1.5 MIT */
/* suffix-thumb 4.0.0 MIT */
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('efrt')) :
typeof define === 'function' && define.amd ? define(['exports', 'efrt'], factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.suffixThumb = {}, global.efrt));
})(this, (function (exports, efrt) { 'use strict';
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
typeof define === 'function' && define.amd ? define(['exports'], factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.suffixThumb = {}));
})(this, (function (exports) { 'use strict';

@@ -25,22 +25,33 @@ const prefix$1 = /^.([0-9]+)/;

// get suffix-rules according to last char of word
const getRules$1 = function (word, model) {
const getRules$1 = function (word, rules = {}) {
let char = word[word.length - 1];
let rules = model.rules[char] || [];
if (rules.length === 0) {
// do we have a generic suffix?
rules = model.rules[''] || rules;
let list = rules[char] || [];
// do we have a generic suffix?
if (rules['']) {
list = list.concat(rules['']);
}
return rules
return list
};
const convert = function (word, model) {
const convert = function (word, model, debug) {
// check list of irregulars
if (model.exceptions.hasOwnProperty(word)) {
if (debug) {
console.log("exception, ", word, model.exceptions[word]);
}
return getKeyVal(word, model)
}
// if model is reversed, try rev rules
let rules = model.rules;
if (model.reversed) {
rules = model.rev;
}
// try suffix rules
const rules = getRules$1(word, model);
rules = getRules$1(word, rules);
for (let i = 0; i < rules.length; i += 1) {
let suffix = rules[i][0];
if (word.endsWith(suffix)) {
if (debug) {
console.log("rule, ", rules[i]);
}
let reg = new RegExp(suffix + '$');

@@ -50,2 +61,5 @@ return word.replace(reg, rules[i][1])

}
if (debug) {
console.log(' x - ' + word);
}
// return the original word unchanged

@@ -55,299 +69,290 @@ return word

const max = 6;
const getSuffixes = function (str = '') {
let list = [];
for (let i = max; i >= 0; i -= 1) {
if (str.length - 1 <= i) {
continue
// longest common prefix
const findOverlap = (from, to) => {
let all = [];
for (let i = 0; i < from.length; i += 1) {
if (from[i] === to[i]) {
all.push(from[i]);
} else {
break
}
let n = str.length - i - 1;
let suffix = str.substring(n, n + str.length - 1);
list.push(suffix);
}
return list
return all.join('')
};
const getAll = function (arr) {
const suffixes = {};
arr.forEach((a) => {
let [from, to] = a;
let fromList = getSuffixes(from);
fromList.push(''); //add a prepend-only option
fromList.forEach((left) => {
suffixes[left] = suffixes[left] || {};
let toList = getSuffixes(to);
toList.forEach((right) => {
suffixes[left][right] = suffixes[left][right] || 0;
suffixes[left][right] += 1;
});
});
let compress$1 = function (key, val) {
let prefix = findOverlap(key, val);
if (prefix.length < 1) {
return [key, val]
}
let out = prefix.length + val.substr(prefix.length);
return [key, out]
};
// console.log(compress('fixture', 'fixturing'))
// index rules by last-char
const indexRules = function (rules) {
let byChar = {};
rules.forEach((a) => {
let suff = a[0] || '';
let char = suff[suff.length - 1] || '';
byChar[char] = byChar[char] || [];
byChar[char].push(a);
});
return suffixes
return byChar
};
const topChange = function (obj, from) {
let keys = Object.keys(obj);
let arr = keys.map((to) => {
return {
from: from,
to: to,
yes: obj[to],
}
const unIndex = function (byChar) {
let arr = [];
Object.keys(byChar).forEach(k => {
arr = arr.concat(byChar[k]);
});
arr = arr.sort((a, b) => {
if (a.yes > b.yes) {
return -1
} else if (a.yes < b.yes) {
return 1
}
return 0
});
return arr
};
const findBest = function (suffixes) {
let good = [];
Object.keys(suffixes).forEach((left) => {
let top = topChange(suffixes[left], left);
if (top[0] && top[0].yes > 1) {
good.push(top[0]);
}
// remove shared data in key-val pairs
// uses an ad-hoc run-length encoding format
// {walk: walking} -> {walk: '.4ing'}
const pressPairs = function (pairs) {
pairs = pairs.map(a => {
return compress$1(a[0], a[1]).join('|')
});
good = good.sort((a, b) => {
if (a.yes > b.yes) {
return -1
} else if (a.yes < b.yes) {
return 1
}
return 0
});
return good
return pairs.join(',')
};
const getScores = function (arr, pairs) {
return arr.map((obj) => {
let yes = 0;
let no = 0;
let exceptions = {};
pairs.forEach((pair) => {
if (pair[0].endsWith(obj.from)) {
let reg = new RegExp(obj.from + '$');//unsafe
let have = pair[0].replace(reg, obj.to);
if (have === pair[1]) {
yes += 1;
} else {
no += 1;
exceptions[pair[0]] = pair[1];
}
}
});
return {
from: obj.from,
to: obj.to,
yes: yes,
no: no,
percent: yes / (yes + no),
exceptions: exceptions,
}
})
const compress = function (model = {}) {
model = Object.assign({}, model);
// compress fwd rules
model.rules = unIndex(model.rules);
model.rules = pressPairs(model.rules);
// compress reverse rules
model.rev = unIndex(model.rev);
model.rev = pressPairs(model.rev);
// compress exceptions
model.exceptions = Object.entries(model.exceptions);
model.exceptions = pressPairs(model.exceptions);
return model
};
const rank = function (arr, pairs) {
let scored = getScores(arr, pairs);
// baseline filter
scored = scored.filter((o) => {
return o.yes > 1 && o.yes > o.no
});
// sort by # of positive
scored = scored.sort((a, b) => {
if (a.yes > b.yes) {
return -1
} else if (a.yes < b.yes) {
return 1
}
return 0
});
return scored
const prefix = /^([0-9]+)/;
const expand = function (key = '', val = '') {
val = String(val);
let m = val.match(prefix);
if (m === null) {
return [key, val]
}
let num = Number(m[1]) || 0;
let pre = key.substring(0, num);
let full = pre + val.replace(prefix, '');
return [key, full]
};
// remove any redundant rules
const squeeze = function (arr) {
let redundant = {};
arr.forEach((o, i) => {
let downstream = arr.slice(i + 1, arr.length);
downstream.forEach((d) => {
if (d.from.endsWith(o.from)) {
// also ensure the surviving one has no exceptions
if (d.no === 0) {
redundant[d.from] = true;
}
}
});
});
// actually remove any redundant suffixes
arr = arr.filter((o) => {
return redundant.hasOwnProperty(o.from) === false
});
return arr
const toArray = function (txt) {
const pipe = /\|/;
return txt.split(/,/).map(str => {
let a = str.split(pipe);
return expand(a[0], a[1])
})
};
function reverse$1(str) {
return str.split('').reverse().join('')
}
const uncompress = function (model = {}) {
model = Object.assign({}, model);
const fmtRules = function (rules) {
// sort by length, then by suffix
rules = rules.sort((a, b) => {
if (a.from.length > b.from.length) {
return -1
} else if (a.from.length < b.from.length) {
return 1
}
a = reverse$1(a.from);
b = reverse$1(b.from);
if (a > b) {
return 1
} else if (a < b) {
return -1
}
return 0
});
return rules.map((o) => [o.from, o.to])
// compress fwd rules
model.rules = toArray(model.rules);
model.rules = indexRules(model.rules);
// compress reverse rules
model.rev = toArray(model.rev);
model.rev = indexRules(model.rev);
// compress exceptions
model.exceptions = toArray(model.exceptions);
model.exceptions = model.exceptions.reduce((h, a) => {
h[a[0]] = a[1];
return h
}, {});
return model
};
const format = function (rules, pairs) {
let exceptions = {};
rules.forEach((rule) => {
Object.assign(exceptions, rule.exceptions);
});
rules = fmtRules(rules);
// console.log(expand('fixture', '6ing'))
// console.log(toArray('heard|4'))
// find remaining pairs with no rule
let remaining = pairs.filter((pair) => {
if (exceptions.hasOwnProperty(pair[0])) {
return false
}
// console.log(rules.find((rule) => pair[0].endsWith(rule.from)))
if (rules.find((rule) => pair[0].endsWith(rule.from))) {
return false
}
return true
});
// add them to exceptions list
remaining.forEach(a => {
exceptions[a[0]] = a[1];
});
const reverseObj = function (obj) {
return Object.entries(obj).reduce((h, a) => {
h[a[1]] = a[0];
return h
}, {})
};
const reverse = function (model) {
let { rules, exceptions, rev } = model;
exceptions = reverseObj(exceptions);
return {
reversed: !Boolean(model.reversed),//toggle this
rules,
exceptions: exceptions,
exceptions,
rev
}
};
const firstPass = function (pairs) {
pairs = pairs.filter((a) => a && a[0] && a[1]);
// look at all patterns
const suffixes = getAll(pairs);
// look for the greatest patterns
let best = findBest(suffixes);
// run pattern against the pairs
let rules = rank(best, pairs);
// console.log(rules)
// remove duplicates
rules = squeeze(rules);
// nice result format
let res = format(rules, pairs);
// console.log(res)
return res
// approximate file-size of given text
const fileSize = (txt) => {
if (!txt) {
return '0kb'
}
if (typeof txt === 'object') {
txt = JSON.stringify(txt);
}
let unit = 'kb';
let num = Buffer.byteLength(txt, 'utf8');
num = num / 1000;
if (num > 1000) {
unit = 'mb';
num = num / 1000;
}
num = Math.round(num * 10) / 10;//round it
return num.toLocaleString() + unit
};
const reduceExceptions = function (res) {
let final = {};
let { rules, exceptions } = res;
Object.keys(exceptions).forEach(k => {
let found = rules.find((rule) => {
return k.endsWith(rule[0])
});
// no rule applies
if (!found) {
final[k] = exceptions[k];
return
}
let tmp = k.replace(found[0], found[1]);
// did we do it wrong?
if (tmp !== exceptions[k]) {
final[k] = exceptions[k]; //still an exception then
}
});
return final
// get suffix-rules according to last char of word
const getRules = function (word, modelRules) {
let char = word[word.length - 1];
let rules = modelRules[char] || [];
// if (modelRules['']) {
// // do we have a generic suffix?
// rules = rules.concat(modelRules[''])
// }
return rules
};
const postProcess = function (res) {
// some exceptions are not anymore
res.exceptions = reduceExceptions(res);
return res
const classify = function (str, model, debug) {
const l = 'Left';
const r = 'Right';
// check known exceptions
if (model.exceptions.hasOwnProperty(str)) {
return l
}
let list = Object.entries(model.exceptions);
for (let i = 0; i < list.length; i += 1) {
if (list[i][1] === str) {
return r
}
}
// check rules
let rules = getRules(str, model.rules);
for (let i = 0; i < rules.length; i += 1) {
if (str.endsWith(rules[i][0])) {
return l
}
}
rules = getRules(str, model.rev);
for (let i = 0; i < rules.length; i += 1) {
if (str.endsWith(rules[i][0])) {
return r
}
}
// check weak-side of rules
rules = getRules(str, model.rules);
for (let i = 0; i < rules.length; i += 1) {
if (str.endsWith(rules[i][1])) {
return r
}
}
rules = getRules(str, model.rev);
for (let i = 0; i < rules.length; i += 1) {
if (str.endsWith(rules[i][1])) {
return l
}
}
return null
};
// index rules by last-char
const indexRules = function (rules) {
let byChar = {};
rules.forEach((a) => {
let suff = a[0] || '';
let char = suff[suff.length - 1] || '';
byChar[char] = byChar[char] || [];
byChar[char].push(a);
});
return byChar
};
const green = str => '\x1b[32m' + str + '\x1b[0m';
const red = str => '\x1b[31m' + str + '\x1b[0m';
const blue = str => '\x1b[34m' + str + '\x1b[0m';
const yellow = str => '\x1b[33m' + str + '\x1b[0m';
const dim = str => '\x1b[2m' + str + '\x1b[0m';
const unIndex = function (byChar) {
let arr = [];
Object.keys(byChar).forEach(k => {
arr = arr.concat(byChar[k]);
const testFwd = function (pairs, model) {
let wrong = 0;
pairs.forEach((a) => {
let created = convert(a[0], model);
if (created !== a[1]) {
wrong += 1;
console.log(red('error:'), yellow(a[0] + ' →' + created));
}
});
return arr
if (wrong === 0) {
console.log(green(` ✓ forward`));
} else {
console.log(red(` ✗ ${wrong} `) + 'errors\n');
}
return wrong
};
const sortRules = function (rules) {
rules = rules.sort((a, b) => {
if (a[0].length > b[0].length) {
return -1
} else if (a[0].length < b[0].length) {
return 1
const testBack = function (pairs, model) {
let wrong = 0;
let rev = reverse(model);
pairs.forEach((a) => {
let created = convert(a[1], rev);
if (created !== a[0]) {
wrong += 1;
console.log(red(' rev ✗: '), yellow(a[1] + ' → ' + created));
}
return 0
});
return rules
if (wrong === 0) {
console.log(green(` ✓ backward`));
} else {
console.log(red(` ✗ ${wrong} `) + 'errors reversed\n');
}
return wrong
};
// add all reverse-exceptions
const addInverse = function (model, pairs) {
// create a reverse model
let tmp = Object.assign({}, model);
tmp.rules = indexRules(model.rules);
let rev = reverse(tmp);
// look for exceptions
pairs.forEach(a => {
let [left, right] = a;
if (convert(right, rev) !== left) {
// console.log(a)
model.exceptions[a[0]] = a[1];
}
});
// console.log(convert('relearn', rev))
return model
const testSize = function (pairs, model) {
let before = fileSize(pairs);
let smol = compress(model);
let after = fileSize(smol);
console.log(` ${dim(before)} -> ${blue(after)}`);
};
const secondPass = function (res, pairs, opts) {
// remove redundant exceptions
res = postProcess(res);
// turn some exceptions into singleton suffix-rules
// res = toRules(res, pairs)
if (opts.inverse !== false) {
res = addInverse(res, pairs);
}
return res
const stats = function (model) {
let rules = 0;
Object.keys(model.rules).forEach(k => rules += model.rules[k].length);
let rev = 0;
Object.keys(model.rev || {}).forEach(k => rev += model.rev[k].length);
let exc = Object.keys(model.exceptions).length;
console.log(` ${blue(rules.toLocaleString())} rules, ${yellow(rev.toLocaleString())} reversed, ${blue(exc.toLocaleString())} exceptions`);
};
const test = function (pairs, opts) {
console.log('\n');
console.log(yellow(pairs.length.toLocaleString()) + ` pairs - ${dim(fileSize(pairs))}`);
let begin = new Date();
let model = learnBoth(pairs, opts);
let end = new Date();
console.log(' ', (end.getTime() - begin.getTime()) / 1000, 'seconds');
console.log(yellow('\nSize:'));
stats(model);
testSize(pairs, model);
model = compress(model);
model = uncompress(model);
console.log(yellow('\nForward:'));
testFwd(pairs, model);
console.log(yellow('\nBackward:'));
testBack(pairs, model);
// hmm
// console.log(yellow('\nClassify:'))
// testSide(pairs, model, 'Left')
// testSide(pairs, model, 'Right')
};
// make sure inputs are not impossible to square-up

@@ -359,3 +364,3 @@ const validate = function (pairs, opts = {}) {

if (left[a[0]] !== undefined) {
if (opts.verbose) {
if (opts.debug) {
console.warn('Duplicate left side:');

@@ -368,3 +373,3 @@ console.log(' 1.', [a[0], left[a[0]]]);

if (right[a[1]] !== undefined) {
if (opts.verbose) {
if (opts.debug) {
console.warn('Duplicate right side:');

@@ -386,134 +391,203 @@ console.log(' 1.', [right[a[1]], a[1]]);

const learn = function (pairs, opts = {}) {
// ensure input pairs are possible
pairs = validate(pairs, opts);
// create basic {rules, exceptions}
let res = firstPass(pairs);
// optimize it further
res = secondPass(res, pairs, opts);
// organize rules by their suffix char
res.rules = indexRules(res.rules);
return res
};
const max = 6;
// longest common prefix
const findOverlap = (from, to) => {
let all = [];
for (let i = 0; i < from.length; i += 1) {
if (from[i] === to[i]) {
all.push(from[i]);
} else {
break
const getSuffixes = function (str = '') {
let list = [];
for (let i = max; i >= 0; i -= 1) {
if (str.length - 1 <= i) {
continue
}
let n = str.length - i - 1;
let suffix = str.substring(n, n + str.length - 1);
list.push(suffix);
}
return all.join('')
return list.reverse()
};
// remove shared data in key-val pairs
// uses an ad-hoc run-length encoding format
// {walk: walking} -> {walk: '.4ing'}
const pressObj = function (obj) {
let res = {};
Object.keys(obj).forEach((k) => {
let val = obj[k];
let prefix = findOverlap(k, val);
if (prefix.length < 2) {
res[k] = val;
return
const getDiff = function (left, right, suff) {
suff = suff.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
let reg = new RegExp(suff + '$');
let stem = left.replace(reg, '');
if (!right.startsWith(stem)) {
return
}
stem = stem.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
let start = new RegExp('^' + stem);
let rest = right.replace(start, '');
return { from: suff, to: rest, id: suff + '|' + rest, reg }
};
const unique = function (arr) {
let set = new Set();
return arr.filter(a => {
if (set.has(a.id)) {
return false
}
let out = '.' + prefix.length + val.substr(prefix.length);
res[k] = out;
set.add(a.id);
return true
})
};
const getAll = function (arr) {
let res = [];
arr.forEach((a) => {
let [left, right] = a;
let list = getSuffixes(left);
list.forEach(suff => {
let diff = getDiff(left, right, suff);
if (diff) {
res.push(diff);
}
});
});
res = unique(res);
return res
};
const toObj = (rules) => {
return rules.reduce((h, a) => {
h[a[0]] = a[1];
return h
}, {})
// console.log(getAll([['laughed', 'laughing']]))
const noDupes = function (rules) {
let already = new Set();
rules = rules.filter(r => {
if (already.has(r.from)) {
return false
}
already.add(r.from);
return true
});
return rules
};
const packRules = function (rules) {
rules = unIndex(rules);
rules = toObj(rules);
rules = pressObj(rules);
rules = efrt.pack(rules);
const cleanup = function (rules) {
// only helpful ones
rules = rules.filter(r => r.yes > 0 && r.yes > r.no);
// one rule per suffix
rules = noDupes(rules);
return rules
};
const compress = function (model = {}) {
model.rules = packRules(model.rules);
// compress exceptions
model.exceptions = pressObj(model.exceptions);
model.exceptions = efrt.pack(model.exceptions);
return model
const getCounts = function (rule, pairs) {
let yes = 0;
let no = 0;
pairs.forEach(pair => {
let [left, right] = pair;
if (!rule.reg.test(left)) {
return
}
// console.log(replace(left, rule.from, rule.to), left.replace(rule.reg, rule.to))
// if (replace(left, rule.from, rule.to) === right) {
if (left.replace(rule.reg, rule.to) === right) {
yes += 1;
} else {
no += 1;
}
});
return { yes, no }
};
const prefix = /^.([0-9]+)/;
const unEncode = function (obj) {
Object.keys(obj).forEach(k => {
let val = obj[k];
let m = val.match(prefix);
if (m !== null) {
let num = Number(m[1]) || 0;
let pre = k.substring(0, num);
let full = pre + val.replace(prefix, '');
obj[k] = full;
const score = function (rules, pairs, opts = {}) {
rules = rules.map(rule => {
let { yes, no } = getCounts(rule, pairs);
rule.yes = yes;
rule.no = no;
delete rule.id;
return rule
});
// worst-to-best
rules = rules.sort((a, b) => {
if (a.yes > b.yes) {
return 1
} else if (a.yes < b.yes) {
return -1
}
return 0
});
return obj
return rules
};
const unpackRules = function (rules) {
if (!rules) {
return {}
}
// un-do our trie compression
rules = efrt.unpack(rules);
// un-do our run-length encoding
rules = unEncode(rules);
// turn into an array
rules = Object.entries(rules);
// ensure they are longest-first order
rules = sortRules(rules);
// index by end-char
rules = indexRules(rules);
const findRules = function (pairs, opts = {}) {
let rules = getAll(pairs);
rules = score(rules, pairs, opts);
rules = cleanup(rules);
return rules
};
const updateRules = function (rules, pairs, opts) {
rules = score(rules, pairs, opts);
rules = cleanup(rules);
return rules
};
const uncompress = function (model = {}) {
if (typeof model.exceptions === 'string') {
model.exceptions = efrt.unpack(model.exceptions);
model.exceptions = unEncode(model.exceptions);
}
if (typeof model.rules === 'string') {
model.rules = unpackRules(model.rules);
}
return model
const trimPairs = function (pairs, rule) {
let { reg, to } = rule;
let done = [];
let remain = pairs.filter(pair => {
let [left, right] = pair;
if (left.match(reg)) {
if (left.replace(reg, to) === right) {
done.push(pair);
return false //done with it
}
}
return true // keep it
});
return { remain, done }
};
// remove any rules that challenge existing pairs
const trimRules = function (rules, pairsDone) {
return rules.filter(r => {
for (let i = 0; i < pairsDone.length; i += 1) {
let pair = pairsDone[i];
if (r.reg.test(pair[0]) && pair[0].replace(r.reg, r.to) !== pair[1]) {
// console.log('banned rule:', r)
return false
}
}
return true
})
};
const reverseObj = function (obj) {
return Object.entries(obj).reduce((h, a) => {
h[a[1]] = a[0];
const learn = function (pairs, opts = {}) {
pairs = validate(pairs, opts);
let rules = findRules(pairs);
let pairsLeft = pairs;
let pairsDone = [];
let chosen = [];
// pick our top rule
while (pairsLeft.length > 0 && rules.length > 0) {
let rule = rules.pop();
chosen.push([rule.from, rule.to]);
// remove now-covered pairs
let res = trimPairs(pairsLeft, rule);
pairsLeft = res.remain;
pairsDone = pairsDone.concat(res.done);
// remove now-unsafe rules
rules = trimRules(rules, pairsDone);
// re-rank our rules
rules = updateRules(rules, pairsLeft, opts);
// logging
if (opts.debug) {
console.log(`\n${rule.from} -> ${rule.to || "''"}`);
console.log(` \x1b[32m +${res.done.length.toLocaleString()} pairs\x1b[0m`);
console.log(' ', pairsLeft.length, 'remaining');
console.log(' ', rules.length, 'rules left');
}
}
// turn em upside-down
chosen = chosen.reverse();
// remaining pairs are exceptions
let exceptions = pairsLeft.reduce((h, a) => {
h[a[0]] = a[1];
return h
}, {})
};
}, {});
const reverseArr = function (arr) {
return arr.map(a => [a[1], a[0]])
};
const reverse = function (model) {
let allRules = [];
Object.keys(model.rules).forEach(k => {
allRules = allRules.concat(reverseArr(model.rules[k]));
});
allRules = sortRules(allRules);
let rules = indexRules(allRules);
let exceptions = reverseObj(model.exceptions);
return {
rules,
rules: chosen,
exceptions

@@ -523,34 +597,30 @@ }

// get suffix-rules according to last char of word
const getRules = function (word, model) {
let char = word[word.length - 1];
let rules = model.rules[char] || [];
if (rules.length === 0) {
// do we have a generic suffix?
rules = model.rules[''] || rules;
}
return rules
const mergeExceptions = function (fwd, bkwd) {
Object.entries(bkwd).forEach(b => {
fwd[b[1]] = b[0]; //reverse
});
return fwd
};
const debug = function (word, model) {
if (model.exceptions.hasOwnProperty(word)) {
let obj = {};
obj[word] = model.exceptions[word];
return { found: 'exception', exception: obj }
const learnBoth = function (pairs, opts = {}) {
let fwd = learn(pairs, opts);
// learn backward too?
if (opts.reverse !== false) {
pairs = pairs.map(a => [a[1], a[0]]);
let bkwd = learn(pairs, opts);
// merge exceptions
fwd.exceptions = mergeExceptions(fwd.exceptions, bkwd.exceptions);
// add rules
fwd.rev = indexRules(bkwd.rules);
}
const rules = getRules(word, model);
for (let i = 0; i < rules.length; i += 1) {
let suffix = rules[i][0];
if (word.endsWith(suffix)) {
return { found: 'rule', rule: rules[i] }
}
}
return { found: null }
fwd.rules = indexRules(fwd.rules);
return fwd
};
exports.classify = classify;
exports.compress = compress;
exports.convert = convert;
exports.debug = debug;
exports.learn = learn;
exports.learn = learnBoth;
exports.reverse = reverse;
exports.test = test;
exports.uncompress = uncompress;

@@ -557,0 +627,0 @@ exports.validate = validate;

{
"name": "suffix-thumb",
"description": "learn transformations between two sets of words",
"version": "3.1.5",
"version": "4.0.0",
"author": "Spencer Kelly <spencermountain@gmail.com> (http://spencermounta.in)",
"main": "./src/index.js",
"unpkg": "./builds/suffix-thumb-client.js",
"types": "types/index.d.ts",
"types": "/builds/types.d.ts",
"type": "module",

@@ -41,16 +41,14 @@ "sideEffects": false,

],
"dependencies": {
"efrt": "2.5.0"
},
"dependencies": {},
"devDependencies": {
"@rollup/plugin-commonjs": "21.0.1",
"@rollup/plugin-node-resolve": "13.1.2",
"@rollup/plugin-node-resolve": "13.1.3",
"amble": "1.3.0",
"rollup": "2.63.0",
"rollup": "2.67.2",
"rollup-plugin-filesize-check": "0.0.1",
"rollup-plugin-terser": "7.0.2",
"tap-dancer": "0.3.4",
"tape": "5.4.0"
"tape": "5.5.2"
},
"license": "MIT"
}

@@ -83,3 +83,3 @@ <div align="center">

```js
learn(pairs, {inverse: false})
learn(pairs, {reverse: false})
```

@@ -137,3 +137,3 @@ you can expect the model to be 5% smaller or so - not much.

]
let model = learn(pairs, {inverse: false})
let model = learn(pairs, {reverse: false})
let out = convert('ok', model)

@@ -143,2 +143,21 @@ // 'right'

### Classify
the model can also be used to classify whether a given word belongs to either Left or Right sides.
```js
import { learn, classify } from 'suffix-thumb'
let pairs = [
['walk', 'walked'],
['talk', 'talked'],
['go', 'went'],
]
let model = learn(pairs)
let out = classify('stalked', model)
// 'Right'
out = classify('waited', model)
// null
```
Unlike convert, the classifier is not guarnteed to return 100% on the training data.
The classifier will generally hit high-90s on the given dataset, but how-well it generalizes to novel input is up-to the dataset.
<!-- spacer -->

@@ -163,8 +182,2 @@ <img height="50px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>

if you find an issue, you can use debug():
```js
import { debug } from 'suffix-thumb'
let out = debug('walk', model)
// --which rule/exception was triggered--
```
<!-- spacer -->

@@ -175,3 +188,3 @@ <img height="50px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>

### See also
* [efrt](https://github.com/spencermountain/efrt) - trie-based JSON compression
* [efrt](https://github.com/spencermountain/efrt) - trie-based prefix compression for JSON

@@ -178,0 +191,0 @@ <!-- spacer -->

import convert from './convert/index.js'
import validate from './learn/validate.js'
import uncompress from './uncompress/index.js'
import validate from './validate/index.js'
import uncompress from './compress/uncompress.js'
import reverse from './reverse/index.js'
import classify from './classify/index.js'
export { convert, uncompress, reverse, validate }
export { convert, uncompress, reverse, validate, classify }

@@ -1,27 +0,29 @@

import { pack } from 'efrt'
import pressObj from './pressObj.js'
import press from './press.js'
import { unIndex } from '../_lib.js'
const toObj = (rules) => {
return rules.reduce((h, a) => {
h[a[0]] = a[1]
return h
}, {})
// remove shared data in key-val pairs
// uses an ad-hoc run-length encoding format
// {walk: walking} -> {walk: '.4ing'}
const pressPairs = function (pairs) {
pairs = pairs.map(a => {
return press(a[0], a[1]).join('|')
})
return pairs.join(',')
}
const packRules = function (rules) {
rules = unIndex(rules)
rules = toObj(rules)
rules = pressObj(rules)
rules = pack(rules)
return rules
}
const compress = function (model = {}) {
model = Object.assign({}, model)
const compress = function (model = {}) {
model.rules = packRules(model.rules)
// compress fwd rules
model.rules = unIndex(model.rules)
model.rules = pressPairs(model.rules)
// compress reverse rules
model.rev = unIndex(model.rev)
model.rev = pressPairs(model.rev)
// compress exceptions
model.exceptions = pressObj(model.exceptions)
model.exceptions = pack(model.exceptions)
model.exceptions = Object.entries(model.exceptions)
model.exceptions = pressPairs(model.exceptions)
return model
}
export default compress

@@ -18,22 +18,33 @@ const prefix = /^.([0-9]+)/

// get suffix-rules according to last char of word
const getRules = function (word, model) {
const getRules = function (word, rules = {}) {
let char = word[word.length - 1]
let rules = model.rules[char] || []
if (rules.length === 0) {
// do we have a generic suffix?
rules = model.rules[''] || rules
let list = rules[char] || []
// do we have a generic suffix?
if (rules['']) {
list = list.concat(rules[''])
}
return rules
return list
}
const convert = function (word, model) {
const convert = function (word, model, debug) {
// check list of irregulars
if (model.exceptions.hasOwnProperty(word)) {
if (debug) {
console.log("exception, ", word, model.exceptions[word])
}
return getKeyVal(word, model)
}
// if model is reversed, try rev rules
let rules = model.rules
if (model.reversed) {
rules = model.rev
}
// try suffix rules
const rules = getRules(word, model)
rules = getRules(word, rules)
for (let i = 0; i < rules.length; i += 1) {
let suffix = rules[i][0]
if (word.endsWith(suffix)) {
if (debug) {
console.log("rule, ", rules[i])
}
let reg = new RegExp(suffix + '$')

@@ -43,2 +54,5 @@ return word.replace(reg, rules[i][1])

}
if (debug) {
console.log(' x - ' + word)
}
// return the original word unchanged

@@ -45,0 +59,0 @@ return word

import convert from './convert/index.js'
import learn from './learn/index.js'
import validate from './learn/validate.js'
import compress from './compress/index.js'
import uncompress from './uncompress/index.js'
import uncompress from './compress/uncompress.js'
import reverse from './reverse/index.js'
import debug from './convert/debug.js'
import test from './test/index.js'
import learn from './learn/index.js'
import validate from './validate/index.js'
import classify from './classify/index.js'
export { learn, convert, compress, uncompress, reverse, validate, debug }
export { learn, convert, compress, uncompress, reverse, validate, test, classify }

@@ -1,17 +0,25 @@

import firstPass from './1st-pass/index.js'
import secondPass from './2nd-pass/index.js'
import learn from './learn.js'
import { indexRules } from '../_lib.js'
import validate from './validate.js'
const learn = function (pairs, opts = {}) {
// ensure input pairs are possible
pairs = validate(pairs, opts)
// create basic {rules, exceptions}
let res = firstPass(pairs)
// optimize it further
res = secondPass(res, pairs, opts)
// organize rules by their suffix char
res.rules = indexRules(res.rules)
return res
const mergeExceptions = function (fwd, bkwd) {
Object.entries(bkwd).forEach(b => {
fwd[b[1]] = b[0] //reverse
})
return fwd
}
export default learn
const learnBoth = function (pairs, opts = {}) {
let fwd = learn(pairs, opts)
// learn backward too?
if (opts.reverse !== false) {
pairs = pairs.map(a => [a[1], a[0]])
let bkwd = learn(pairs, opts)
// merge exceptions
fwd.exceptions = mergeExceptions(fwd.exceptions, bkwd.exceptions)
// add rules
fwd.rev = indexRules(bkwd.rules)
}
fwd.rules = indexRules(fwd.rules)
return fwd
}
export default learnBoth

@@ -1,3 +0,1 @@

import { sortRules, indexRules } from '../_lib.js'
const reverseObj = function (obj) {

@@ -10,19 +8,12 @@ return Object.entries(obj).reduce((h, a) => {

const reverseArr = function (arr) {
return arr.map(a => [a[1], a[0]])
}
const reverse = function (model) {
let allRules = []
Object.keys(model.rules).forEach(k => {
allRules = allRules.concat(reverseArr(model.rules[k]))
})
allRules = sortRules(allRules)
let rules = indexRules(allRules)
let exceptions = reverseObj(model.exceptions)
let { rules, exceptions, rev } = model
exceptions = reverseObj(exceptions)
return {
reversed: !Boolean(model.reversed),//toggle this
rules,
exceptions
exceptions,
rev
}
}
export default reverse

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc