@@ -1,1 +0,1 @@
		import{unpack as e}from"efrt";const t=/^.([0-9]+)/,r=function(e,t){if(r=t.rules,"[object Array]"===Object.prototype.toString.call(r))return t.rules;var r;let n=e[e.length-1],o=t.rules[n]\|\|[];return 0===o.length&&(o=t.rules[""]\|\|o),o},n=function(e,n){if(n.exceptions.hasOwnProperty(e))return function(e,r){let n=r.exceptions[e],o=n.match(t);if(null===o)return r.exceptions[e];let l=Number(o[1])\|\|0;return e.substr(0,l)+n.replace(t,"")}(e,n);const o=r(e,n);for(let t=0;t<o.length;t+=1){let r=o[t][0];if(e.endsWith(r)){let n=new RegExp(r+"$");return e.replace(n,o[t][1])}}return e},o=function(e,t={}){let r={},n={};return e=e.filter((e=>void 0!==r[e[0]]?(t.verbose&&(console.warn("Duplicate left side:"),console.log(" 1.",[e[0],r[e[0]]]),console.log(" 2.",e)),!1):void 0!==n[e[1]]?(t.verbose&&(console.warn("Duplicate right side:"),console.log(" 1.",[n[e[1]],e[1]]),console.log(" 2.",e)),!1===t.inverse):(r[e[0]]=e[1],n[e[1]]=e[0],!0)))},l=function(e){let t={};return e.forEach((e=>{let r=e[0]\|\|"",n=r[r.length-1]\|\|"";t[n]=t[n]\|\|[],t[n].push(e)})),t},c=function(e){return e=e.sort(((e,t)=>e[0].length>t[0].length?-1:e[0].length<t[0].length?1:0))},s=/^.([0-9]+)/,u=function(e){return Object.keys(e).forEach((t=>{let r=e[t],n=r.match(s);if(null!==n){let o=Number(n[1])\|\|0,l=t.substring(0,o)+r.replace(s,"");e[t]=l}})),e},i=function(t={}){var r;return"string"==typeof t.exceptions&&(t.exceptions=e(t.exceptions),t.exceptions=u(t.exceptions)),"string"==typeof t.rules&&(t.rules=(r=t.rules,r=e(r),r=u(r),r=Object.entries(r),r=c(r),l(r))),t},p=function(e){let t=[];var r;return Object.keys(e.rules).forEach((r=>{t=t.concat(e.rules[r].map((e=>[e[1],e[0]])))})),t=c(t),{rules:l(t),exceptions:(r=e.exceptions,Object.entries(r).reduce(((e,t)=>(e[t[1]]=t[0],e)),{}))}};export{n as convert,p as reverse,i as uncompress,o as validate};
		import{unpack as e}from"efrt";const t=/^.([0-9]+)/,n=function(e,n){if(n.exceptions.hasOwnProperty(e))return function(e,n){let r=n.exceptions[e],o=r.match(t);if(null===o)return n.exceptions[e];let l=Number(o[1])\|\|0;return e.substr(0,l)+r.replace(t,"")}(e,n);const r=function(e,t){let n=e[e.length-1],r=t.rules[n]\|\|[];return 0===r.length&&(r=t.rules[""]\|\|r),r}(e,n);for(let t=0;t<r.length;t+=1){let n=r[t][0];if(e.endsWith(n)){let o=new RegExp(n+"$");return e.replace(o,r[t][1])}}return e},r=function(e,t={}){let n={},r={};return e=e.filter((e=>void 0!==n[e[0]]?(t.verbose&&(console.warn("Duplicate left side:"),console.log(" 1.",[e[0],n[e[0]]]),console.log(" 2.",e)),!1):void 0!==r[e[1]]?(t.verbose&&(console.warn("Duplicate right side:"),console.log(" 1.",[r[e[1]],e[1]]),console.log(" 2.",e)),!1===t.inverse):(n[e[0]]=e[1],r[e[1]]=e[0],!0)))},o=function(e){let t={};return e.forEach((e=>{let n=e[0]\|\|"",r=n[n.length-1]\|\|"";t[r]=t[r]\|\|[],t[r].push(e)})),t},l=function(e){return e=e.sort(((e,t)=>e[0].length>t[0].length?-1:e[0].length<t[0].length?1:0))},s=/^.([0-9]+)/,c=function(e){return Object.keys(e).forEach((t=>{let n=e[t],r=n.match(s);if(null!==r){let o=Number(r[1])\|\|0,l=t.substring(0,o)+n.replace(s,"");e[t]=l}})),e},i=function(t={}){var n;return"string"==typeof t.exceptions&&(t.exceptions=e(t.exceptions),t.exceptions=c(t.exceptions)),"string"==typeof t.rules&&(t.rules=(n=t.rules)?(n=e(n),n=c(n),n=Object.entries(n),n=l(n),n=o(n)):{}),t},u=function(e){let t=[];var n;return Object.keys(e.rules).forEach((n=>{t=t.concat(e.rules[n].map((e=>[e[1],e[0]])))})),t=l(t),{rules:o(t),exceptions:(n=e.exceptions,Object.entries(n).reduce(((e,t)=>(e[t[1]]=t[0],e)),{}))}};export{n as convert,u as reverse,i as uncompress,r as validate};

563

builds/suffix-thumb.js

		@@ -1,1 +0,562 @@
		const e=/^.([0-9]+)/,t=function(e,t){if(r=t.rules,"[object Array]"===Object.prototype.toString.call(r))return t.rules;var r;let n=e[e.length-1],l=t.rules[n]\|\|[];return 0===l.length&&(l=t.rules[""]\|\|l),l},r=function(r,n){if(n.exceptions.hasOwnProperty(r))return function(t,r){let n=r.exceptions[t],l=n.match(e);if(null===l)return r.exceptions[t];let u=Number(l[1])\|\|0;return t.substr(0,u)+n.replace(e,"")}(r,n);const l=t(r,n);for(let e=0;e<l.length;e+=1){let t=l[e][0];if(r.endsWith(t)){let n=new RegExp(t+"$");return r.replace(n,l[e][1])}}return r};export{r as default};
		/* suffix-thumb 3.1.0 MIT */
		(function (global, factory) {
		typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('efrt')) :
		typeof define === 'function' && define.amd ? define(['exports', 'efrt'], factory) :
		(global = typeof globalThis !== 'undefined' ? globalThis : global \|\| self, factory(global.suffixThumb = {}, global.efrt));
		})(this, (function (exports, efrt) { 'use strict';

		const prefix$1 = /^.([0-9]+)/;

		// handle compressed form of key-value pair
		const getKeyVal = function (word, model) {
		let val = model.exceptions[word];
		let m = val.match(prefix$1);
		if (m === null) {
		// return not compressed form
		return model.exceptions[word]
		}
		// uncompress it
		let num = Number(m[1]) \|\| 0;
		let pre = word.substr(0, num);
		return pre + val.replace(prefix$1, '')
		};

		// get suffix-rules according to last char of word
		const getRules$1 = function (word, model) {
		let char = word[word.length - 1];
		let rules = model.rules[char] \|\| [];
		if (rules.length === 0) {
		// do we have a generic suffix?
		rules = model.rules[''] \|\| rules;
		}
		return rules
		};

		const convert = function (word, model) {
		// check list of irregulars
		if (model.exceptions.hasOwnProperty(word)) {
		return getKeyVal(word, model)
		}
		// try suffix rules
		const rules = getRules$1(word, model);
		for (let i = 0; i < rules.length; i += 1) {
		let suffix = rules[i][0];
		if (word.endsWith(suffix)) {
		let reg = new RegExp(suffix + '$');
		return word.replace(reg, rules[i][1])
		}
		}
		// return the original word unchanged
		return word
		};

		const max = 6;

		const getSuffixes = function (str = '') {
		let list = [];
		for (let i = max; i >= 0; i -= 1) {
		if (str.length - 1 <= i) {
		continue
		}
		let n = str.length - i - 1;
		let suffix = str.substring(n, n + str.length - 1);
		list.push(suffix);
		}
		return list
		};

		const getAll = function (arr) {
		const suffixes = {};
		arr.forEach((a) => {
		let [from, to] = a;
		let fromList = getSuffixes(from);
		fromList.push(''); //add a prepend-only option
		fromList.forEach((left) => {
		suffixes[left] = suffixes[left] \|\| {};
		let toList = getSuffixes(to);
		toList.forEach((right) => {
		suffixes[left][right] = suffixes[left][right] \|\| 0;
		suffixes[left][right] += 1;
		});
		});
		});
		return suffixes
		};

		const topChange = function (obj, from) {
		let keys = Object.keys(obj);
		let arr = keys.map((to) => {
		return {
		from: from,
		to: to,
		yes: obj[to],
		}
		});
		arr = arr.sort((a, b) => {
		if (a.yes > b.yes) {
		return -1
		} else if (a.yes < b.yes) {
		return 1
		}
		return 0
		});
		return arr
		};

		const findBest = function (suffixes) {
		let good = [];
		Object.keys(suffixes).forEach((left) => {
		let top = topChange(suffixes[left], left);
		if (top[0] && top[0].yes > 1) {
		good.push(top[0]);
		}
		});
		good = good.sort((a, b) => {
		if (a.yes > b.yes) {
		return -1
		} else if (a.yes < b.yes) {
		return 1
		}
		return 0
		});
		return good
		};

		const getScores = function (arr, pairs) {
		return arr.map((obj) => {
		let yes = 0;
		let no = 0;
		let exceptions = {};
		pairs.forEach((pair) => {
		if (pair[0].endsWith(obj.from)) {
		let reg = new RegExp(obj.from + '$');//unsafe
		let have = pair[0].replace(reg, obj.to);
		if (have === pair[1]) {
		yes += 1;
		} else {
		no += 1;
		exceptions[pair[0]] = pair[1];
		}
		}
		});
		return {
		from: obj.from,
		to: obj.to,
		yes: yes,
		no: no,
		percent: yes / (yes + no),
		exceptions: exceptions,
		}
		})
		};

		const rank = function (arr, pairs) {
		let scored = getScores(arr, pairs);
		// baseline filter
		scored = scored.filter((o) => {
		return o.yes > 1 && o.yes > o.no
		});
		// sort by # of positive
		scored = scored.sort((a, b) => {
		if (a.yes > b.yes) {
		return -1
		} else if (a.yes < b.yes) {
		return 1
		}
		return 0
		});
		return scored
		};

		// remove any redundant rules
		const squeeze = function (arr) {
		let redundant = {};
		arr.forEach((o, i) => {
		let downstream = arr.slice(i + 1, arr.length);
		downstream.forEach((d) => {
		if (d.from.endsWith(o.from)) {
		// also ensure the surviving one has no exceptions
		if (d.no === 0) {
		redundant[d.from] = true;
		}
		}
		});
		});
		// actually remove any redundant suffixes
		arr = arr.filter((o) => {
		return redundant.hasOwnProperty(o.from) === false
		});
		return arr
		};

		function reverse$1(str) {
		return str.split('').reverse().join('')
		}

		const fmtRules = function (rules) {
		// sort by length, then by suffix
		rules = rules.sort((a, b) => {
		if (a.from.length > b.from.length) {
		return -1
		} else if (a.from.length < b.from.length) {
		return 1
		}
		a = reverse$1(a.from);
		b = reverse$1(b.from);
		if (a > b) {
		return 1
		} else if (a < b) {
		return -1
		}
		return 0
		});
		return rules.map((o) => [o.from, o.to])
		};

		const format = function (rules, pairs) {
		let exceptions = {};
		rules.forEach((rule) => {
		Object.assign(exceptions, rule.exceptions);
		});
		rules = fmtRules(rules);

		// find remaining pairs with no rule
		let remaining = pairs.filter((pair) => {
		if (exceptions.hasOwnProperty(pair[0])) {
		return false
		}
		// console.log(rules.find((rule) => pair[0].endsWith(rule.from)))
		if (rules.find((rule) => pair[0].endsWith(rule.from))) {
		return false
		}
		return true
		});
		// add them to exceptions list
		remaining.forEach(a => {
		exceptions[a[0]] = a[1];
		});
		return {
		rules,
		exceptions: exceptions,
		}
		};

		const firstPass = function (pairs) {
		pairs = pairs.filter((a) => a && a[0] && a[1]);
		// look at all patterns
		const suffixes = getAll(pairs);
		// look for the greatest patterns
		let best = findBest(suffixes);
		// run pattern against the pairs
		let rules = rank(best, pairs);
		// console.log(rules)
		// remove duplicates
		rules = squeeze(rules);
		// nice result format
		let res = format(rules, pairs);
		// console.log(res)
		return res
		};

		const reduceExceptions = function (res) {
		let final = {};
		let { rules, exceptions } = res;
		Object.keys(exceptions).forEach(k => {
		let found = rules.find((rule) => {
		return k.endsWith(rule[0])
		});
		// no rule applies
		if (!found) {
		final[k] = exceptions[k];
		return
		}
		let tmp = k.replace(found[0], found[1]);
		// did we do it wrong?
		if (tmp !== exceptions[k]) {
		final[k] = exceptions[k]; //still an exception then
		}
		});
		return final
		};


		const postProcess = function (res) {
		// some exceptions are not anymore
		res.exceptions = reduceExceptions(res);
		return res
		};

		// index rules by last-char
		const indexRules = function (rules) {
		let byChar = {};
		rules.forEach((a) => {
		let suff = a[0] \|\| '';
		let char = suff[suff.length - 1] \|\| '';
		byChar[char] = byChar[char] \|\| [];
		byChar[char].push(a);
		});
		return byChar
		};

		const unIndex = function (byChar) {
		let arr = [];
		Object.keys(byChar).forEach(k => {
		arr = arr.concat(byChar[k]);
		});
		return arr
		};

		const sortRules = function (rules) {
		rules = rules.sort((a, b) => {
		if (a[0].length > b[0].length) {
		return -1
		} else if (a[0].length < b[0].length) {
		return 1
		}
		return 0
		});
		return rules
		};

		// add all reverse-exceptions
		const addInverse = function (model, pairs) {
		// create a reverse model
		let tmp = Object.assign({}, model);
		tmp.rules = indexRules(model.rules);
		let rev = reverse(tmp);
		// look for exceptions
		pairs.forEach(a => {
		let [left, right] = a;
		if (convert(right, rev) !== left) {
		// console.log(a)
		model.exceptions[a[0]] = a[1];
		}
		});
		// console.log(convert('relearn', rev))
		return model
		};

		const secondPass = function (res, pairs, opts) {
		// remove redundant exceptions
		res = postProcess(res);
		// turn some exceptions into singleton suffix-rules
		// res = toRules(res, pairs)
		if (opts.inverse !== false) {
		res = addInverse(res, pairs);
		}
		return res
		};

		// make sure inputs are not impossible to square-up
		const validate = function (pairs, opts = {}) {
		let left = {};
		let right = {};
		pairs = pairs.filter(a => {
		if (left[a[0]] !== undefined) {
		if (opts.verbose) {
		console.warn('Duplicate left side:');
		console.log(' 1.', [a[0], left[a[0]]]);
		console.log(' 2.', a);
		}
		return false
		}
		if (right[a[1]] !== undefined) {
		if (opts.verbose) {
		console.warn('Duplicate right side:');
		console.log(' 1.', [right[a[1]], a[1]]);
		console.log(' 2.', a);
		}
		if (opts.inverse === false) {
		return true //allow it
		}
		return false
		}
		left[a[0]] = a[1];
		right[a[1]] = a[0];
		return true
		});
		return pairs
		};

		const learn = function (pairs, opts = {}) {
		// ensure input pairs are possible
		pairs = validate(pairs, opts);
		// create basic {rules, exceptions}
		let res = firstPass(pairs);
		// optimize it further
		res = secondPass(res, pairs, opts);
		// organize rules by their suffix char
		res.rules = indexRules(res.rules);
		return res
		};

		// longest common prefix
		const findOverlap = (from, to) => {
		let all = [];
		for (let i = 0; i < from.length; i += 1) {
		if (from[i] === to[i]) {
		all.push(from[i]);
		} else {
		break
		}
		}
		return all.join('')
		};

		// remove shared data in key-val pairs
		// uses an ad-hoc run-length encoding format
		// {walk: walking} -> {walk: '.4ing'}
		const pressObj = function (obj) {
		let res = {};
		Object.keys(obj).forEach((k) => {
		let val = obj[k];
		let prefix = findOverlap(k, val);
		if (prefix.length < 2) {
		res[k] = val;
		return
		}
		let out = '.' + prefix.length + val.substr(prefix.length);
		res[k] = out;
		});
		return res
		};

		const toObj = (rules) => {
		return rules.reduce((h, a) => {
		h[a[0]] = a[1];
		return h
		}, {})
		};

		const packRules = function (rules) {
		rules = unIndex(rules);
		rules = toObj(rules);
		rules = pressObj(rules);
		rules = efrt.pack(rules);
		return rules
		};

		const compress = function (model = {}) {
		model.rules = packRules(model.rules);
		// compress exceptions
		model.exceptions = pressObj(model.exceptions);
		model.exceptions = efrt.pack(model.exceptions);
		return model
		};

		const prefix = /^.([0-9]+)/;

		const unEncode = function (obj) {
		Object.keys(obj).forEach(k => {
		let val = obj[k];
		let m = val.match(prefix);
		if (m !== null) {
		let num = Number(m[1]) \|\| 0;
		let pre = k.substring(0, num);
		let full = pre + val.replace(prefix, '');
		obj[k] = full;
		}
		});
		return obj
		};

		const unpackRules = function (rules) {
		if (!rules) {
		return {}
		}
		// un-do our trie compression
		rules = efrt.unpack(rules);
		// un-do our run-length encoding
		rules = unEncode(rules);
		// turn into an array
		rules = Object.entries(rules);
		// ensure they are longest-first order
		rules = sortRules(rules);
		// index by end-char
		rules = indexRules(rules);
		return rules
		};


		const uncompress = function (model = {}) {
		if (typeof model.exceptions === 'string') {
		model.exceptions = efrt.unpack(model.exceptions);
		model.exceptions = unEncode(model.exceptions);
		}
		if (typeof model.rules === 'string') {
		model.rules = unpackRules(model.rules);
		}
		return model
		};

		const reverseObj = function (obj) {
		return Object.entries(obj).reduce((h, a) => {
		h[a[1]] = a[0];
		return h
		}, {})
		};

		const reverseArr = function (arr) {
		return arr.map(a => [a[1], a[0]])
		};

		const reverse = function (model) {
		let allRules = [];
		Object.keys(model.rules).forEach(k => {
		allRules = allRules.concat(reverseArr(model.rules[k]));
		});
		allRules = sortRules(allRules);
		let rules = indexRules(allRules);
		let exceptions = reverseObj(model.exceptions);
		return {
		rules,
		exceptions
		}
		};

		// get suffix-rules according to last char of word
		const getRules = function (word, model) {
		let char = word[word.length - 1];
		let rules = model.rules[char] \|\| [];
		if (rules.length === 0) {
		// do we have a generic suffix?
		rules = model.rules[''] \|\| rules;
		}
		return rules
		};

		const debug = function (word, model) {
		if (model.exceptions.hasOwnProperty(word)) {
		let obj = {};
		obj[word] = model.exceptions[word];
		return { found: 'exception', exception: obj }
		}
		const rules = getRules(word, model);
		for (let i = 0; i < rules.length; i += 1) {
		let suffix = rules[i][0];
		if (word.endsWith(suffix)) {
		return { found: 'rule', rule: rules[i] }
		}
		}
		return { found: null }
		};

		Object.defineProperty(exports, 'pack', {
		enumerable: true,
		get: function () { return efrt.pack; }
		});
		Object.defineProperty(exports, 'unpack', {
		enumerable: true,
		get: function () { return efrt.unpack; }
		});
		exports.compress = compress;
		exports.convert = convert;
		exports.debug = debug;
		exports.learn = learn;
		exports.reverse = reverse;
		exports.uncompress = uncompress;
		exports.validate = validate;

		Object.defineProperty(exports, '__esModule', { value: true });

		}));

package.json

		{
		"name": "suffix-thumb",
		"description": "learn transformations between two sets of words",
		"version": "3.0.0",
		"version": "3.1.0",
		"author": "Spencer Kelly <spencermountain@gmail.com> (http://spencermounta.in)",
		"main": "./builds/suffix-thumb.mjs",
		"unpkg": "./builds/suffix-thumb.min.js",
		"unpkg": "./builds/suffix-thumb-client.js",
		"module": "./builds/suffix-thumb.mjs",
		@@ -14,3 +14,3 @@ "types": "types/index.d.ts",
		"import": "./builds/suffix-thumb.mjs",
		"require": "./builds/suffix-thumb.cjs"
		"require": "./builds/suffix-thumb.js"
		},
		@@ -53,6 +53,6 @@ "./learn": {
		"@rollup/plugin-commonjs": "21.0.1",
		"@rollup/plugin-node-resolve": "13.1.1",
		"@rollup/plugin-node-resolve": "13.1.2",
		"amble": "1.3.0",
		"efrt": "^2.3.2",
		"rollup": "2.62.0",
		"rollup": "2.63.0",
		"rollup-plugin-filesize-check": "0.0.1",
		@@ -64,2 +64,2 @@ "rollup-plugin-terser": "7.0.2",
		"license": "MIT"
		}
		}

README.md

		<div align="center">
		<!-- spacer -->
		<img height="15px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>


		<img src="https://cloud.githubusercontent.com/assets/399657/23590290/ede73772-01aa-11e7-8915-181ef21027bc.png" />
		@@ -26,14 +30,18 @@

		`suffix-thumb` tries to discover the way two sets of words map to one another, according to changes in their suffix.
		discover the minimal rules for mapping two sets of words to one another, according to changes in their suffix.

		It was built to learn rules about verb conjugations, but in a way, it is just a generic compression algorithm.
		It was built for learning rules about verb conjugations, but in a way, it is just a generic compression algorithm.

		The assumption is that a word's _suffix_ is the most-changed part of a word.
		The assumption is that a word's _suffix_ is the most-often changed part of a word.

		<!-- ![carbon(1)](https://user-images.githubusercontent.com/399657/79898840-e7e66780-83d9-11ea-9ff3-099bf39cf892.png) -->

		### Learn → Convert
		![preview](https://user-images.githubusercontent.com/399657/147783157-f8bdf781-0925-4af3-9fdc-beb84073803e.png)


		<!-- spacer -->
		<img height="50px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>

		### Learn → Convert

		```js
		@@ -53,18 +61,4 @@ import { learn, convert } from 'suffix-thumb'

		let pairs = [
		['aail', 'aael'],
		['bbil', 'bbel'],
		['cil', 'cel'],
		['snafoo', 'snabar'],
		['poofoo', 'poobar'],
		]
		let model = learn(pairs)
		/* {
		rules: { o: [ [ 'foo', 'bar' ] ], l: [ [ 'il', 'el' ] ] },
		exceptions: {},
		}
		*/

		let out = convert('snafoo', model)
		// 'snabar'
		let out = convert('walk', model)
		// 'walked'
		```
		@@ -168,8 +162,19 @@ <!-- spacer -->

		if you find an issue, you can use debug():
		```js
		import { debug } from 'suffix-thumb'
		let out = debug('walk', model)
		// --which rule/exception was triggered--
		```
		<!-- spacer -->
		<img height="50px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>


		### See also
		* [efrt](https://github.com/spencermountain/efrt) - trie-based JSON compression

		<!-- spacer -->
		<img height="50px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>


		MIT

builds/suffix-thumb.cjs

builds/suffix-thumb.min.js

builds/suffix-thumb.mjs

Sorry, the diff of this file is not supported yet

suffix-thumb - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics