suffix-thumb
Advanced tools
Comparing version 0.1.0 to 1.0.0
@@ -1,8 +0,27 @@ | ||
/* suffix-thumb 0.1.0 MIT */ | ||
/* suffix-thumb 1.0.0 MIT */ | ||
(function (global, factory) { | ||
typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() : | ||
typeof define === 'function' && define.amd ? define(factory) : | ||
(global = global || self, global.suffixThumb = factory()); | ||
}(this, (function () { 'use strict'; | ||
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) : | ||
typeof define === 'function' && define.amd ? define(['exports'], factory) : | ||
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.suffixThumb = {})); | ||
}(this, (function (exports) { 'use strict'; | ||
var convert = function convert(word, model) { | ||
// check list of irregulars | ||
if (model.exceptions.hasOwnProperty(word)) { | ||
return model.exceptions[word]; | ||
} // try suffix rules | ||
for (var i = 0; i < model.rules.length; i += 1) { | ||
var suffix = model.rules[i][0]; | ||
if (word.endsWith(suffix)) { | ||
var reg = new RegExp(suffix + '$'); | ||
return word.replace(reg, model.rules[i][1]); | ||
} | ||
} | ||
return null; | ||
}; | ||
function _slicedToArray(arr, i) { | ||
@@ -17,10 +36,13 @@ return _arrayWithHoles(arr) || _iterableToArrayLimit(arr, i) || _unsupportedIterableToArray(arr, i) || _nonIterableRest(); | ||
function _iterableToArrayLimit(arr, i) { | ||
if (typeof Symbol === "undefined" || !(Symbol.iterator in Object(arr))) return; | ||
var _i = arr == null ? null : typeof Symbol !== "undefined" && arr[Symbol.iterator] || arr["@@iterator"]; | ||
if (_i == null) return; | ||
var _arr = []; | ||
var _n = true; | ||
var _d = false; | ||
var _e = undefined; | ||
var _s, _e; | ||
try { | ||
for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { | ||
for (_i = _i.call(arr); !(_n = (_s = _i.next()).done); _n = true) { | ||
_arr.push(_s.value); | ||
@@ -49,3 +71,3 @@ | ||
if (n === "Object" && o.constructor) n = o.constructor.name; | ||
if (n === "Map" || n === "Set") return Array.from(n); | ||
if (n === "Map" || n === "Set") return Array.from(o); | ||
if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); | ||
@@ -66,3 +88,4 @@ } | ||
var getSuffixes = function getSuffixes(str) { | ||
var getSuffixes = function getSuffixes() { | ||
var str = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ''; | ||
var list = []; | ||
@@ -104,4 +127,2 @@ | ||
var _01GetAll = getAll; | ||
var topChange = function topChange(obj, from) { | ||
@@ -149,4 +170,2 @@ var keys = Object.keys(obj); | ||
var _02FindBest = findBest; | ||
var getScores = function getScores(arr, pairs) { | ||
@@ -198,4 +217,2 @@ return arr.map(function (obj) { | ||
var _03Rank = rank; | ||
var compress = function compress(arr) { | ||
@@ -208,3 +225,2 @@ var redundant = {}; // remove any redundant downstream | ||
if (d.from.endsWith(o.from)) { | ||
// console.log(o.from + ' #' + i + ' -> #' + k + ' ' + d.from) | ||
redundant[d.from] = true; | ||
@@ -221,4 +237,2 @@ } | ||
var _04Compress = compress; | ||
function reverse(str) { | ||
@@ -283,22 +297,64 @@ return str.split('').reverse().join(''); | ||
var _05Format = format; | ||
var find = function find(pairs) { | ||
pairs = pairs.filter(function (a) { | ||
return a && a[0] && a[1]; | ||
}); // look at all patterns | ||
var thumb = function thumb(pairs) { | ||
// look at all patterns | ||
var suffixes = _01GetAll(pairs); // look for the greatest patterns | ||
var suffixes = getAll(pairs); // look for the greatest patterns | ||
var best = _02FindBest(suffixes); // run pattern against the pairs | ||
var best = findBest(suffixes); // run pattern against the pairs | ||
var rules = _03Rank(best, pairs); // remove duplicates | ||
var rules = rank(best, pairs); // remove duplicates | ||
rules = _04Compress(rules); // nice result format | ||
rules = compress(rules); // nice result format | ||
return _05Format(rules, pairs); | ||
return format(rules, pairs); | ||
}; | ||
var src = thumb; | ||
var percent = function percent(part, total) { | ||
var num = part / total; | ||
num = Math.round(num * 1000) / 1000; | ||
return num; | ||
}; | ||
return src; | ||
var postProcess = function postProcess(res, inputSize) { | ||
var count = 0; | ||
res.rules = res.rules.map(function (a) { | ||
count += a[2]; | ||
return a.slice(0, 2); | ||
}); // convert exceptions to an object | ||
res.exceptions = res.exceptions.reduce(function (h, a) { | ||
h[a[0]] = a[1]; | ||
return h; | ||
}, {}); // sort rules results | ||
res.rules = res.rules.sort(function (a, b) { | ||
if (a[0].length > b[0].length) { | ||
return -1; | ||
} else if (a[0].length < b[0].length) { | ||
return 1; | ||
} | ||
return 0; | ||
}); | ||
res.coverage = percent(count, inputSize); | ||
return res; | ||
}; | ||
var wrapper = function wrapper(pairs) { | ||
var inputSize = pairs.length; | ||
var res = {}; | ||
var found = find(pairs); | ||
res.rules = found.rules || []; | ||
res.exceptions = found.remaining.concat(Object.entries(found.exceptions)); | ||
res = postProcess(res, inputSize); | ||
return res; | ||
}; | ||
exports.convert = convert; | ||
exports.find = wrapper; | ||
Object.defineProperty(exports, '__esModule', { value: true }); | ||
}))); | ||
//# sourceMappingURL=suffix-thumb.js.map |
@@ -1,1 +0,1 @@ | ||
!function(r,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(r=r||self).suffixThumb=n()}(this,(function(){"use strict";function r(r,t){return function(r){if(Array.isArray(r))return r}(r)||function(r,n){if("undefined"==typeof Symbol||!(Symbol.iterator in Object(r)))return;var t=[],e=!0,o=!1,f=void 0;try{for(var u,i=r[Symbol.iterator]();!(e=(u=i.next()).done)&&(t.push(u.value),!n||t.length!==n);e=!0);}catch(r){o=!0,f=r}finally{try{e||null==i.return||i.return()}finally{if(o)throw f}}return t}(r,t)||function(r,t){if(!r)return;if("string"==typeof r)return n(r,t);var e=Object.prototype.toString.call(r).slice(8,-1);"Object"===e&&r.constructor&&(e=r.constructor.name);if("Map"===e||"Set"===e)return Array.from(e);if("Arguments"===e||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(e))return n(r,t)}(r,t)||function(){throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function n(r,n){(null==n||n>r.length)&&(n=r.length);for(var t=0,e=new Array(n);t<n;t++)e[t]=r[t];return e}var t=function(r){for(var n=[],t=4;t>=0;t-=1)if(!(r.length-1<=t)){var e=r.substr(r.length-t-1,r.length-1);n.push(e)}return n},e=function(n){var e={};return n.forEach((function(n){var o=r(n,2),f=o[0],u=o[1],i=t(f);i.push(""),i.forEach((function(r){e[r]=e[r]||{},t(u).forEach((function(n){e[r][n]=e[r][n]||0,e[r][n]+=1}))}))})),e},o=function(r){var n=[];return Object.keys(r).forEach((function(t){var e,o,f=(e=r[t],o=t,Object.keys(e).map((function(r){return{from:o,to:r,yes:e[r]}})).sort((function(r,n){return r.yes>n.yes?-1:r.yes<n.yes?1:0})));f[0]&&f[0].yes>1&&n.push(f[0])})),n=n.sort((function(r,n){return r.yes>n.yes?-1:r.yes<n.yes?1:0}))},f=function(r,n){var t=function(r,n){return r.map((function(r){var t=0,e=0,o={};return n.forEach((function(n){if(n[0].endsWith(r.from)){var f=new RegExp(r.from+"$");n[0].replace(f,r.to)===n[1]?t+=1:(e+=1,o[n[0]]=n[1])}})),{from:r.from,to:r.to,yes:t,no:e,percent:t/(t+e),exceptions:o}}))}(r,n);return t=(t=t.filter((function(r){return r.yes>1&&r.yes>r.no}))).sort((function(r,n){return r.yes>n.yes?-1:r.yes<n.yes?1:0}))},u=function(r){var n={};return r.forEach((function(t,e){r.slice(e+1,r.length).forEach((function(r){r.from.endsWith(t.from)&&(n[r.from]=!0)}))})),r=r.filter((function(r){return!1===n.hasOwnProperty(r.from)}))};function i(r){return r.split("").reverse().join("")}var c=function(r){return(r=r.sort((function(r,n){return r.from.length>n.from.length?-1:r.from.length<n.from.length||(r=i(r.from))>(n=i(n.from))?1:r<n?-1:0}))).map((function(r){return[r.from,r.to,r.yes]}))},a=function(r,n){var t={};r.forEach((function(r){Object.assign(t,r.exceptions)}));var e=n.filter((function(n){return!t.hasOwnProperty(n[0])&&!r.find((function(r){return n[0].endsWith(r.from)}))})),o=(n.length-e.length)/n.length;return{rules:c(r),exceptions:t,coverage:o,remaining:e}};return function(r){var n=e(r),t=o(n),i=f(t,r);return i=u(i),a(i,r)}})); | ||
!function(n,r){"object"==typeof exports&&"undefined"!=typeof module?r(exports):"function"==typeof define&&define.amd?define(["exports"],r):r((n="undefined"!=typeof globalThis?globalThis:n||self).suffixThumb={})}(this,(function(n){"use strict";function r(n,r){return function(n){if(Array.isArray(n))return n}(n)||function(n,r){var e=null==n?null:"undefined"!=typeof Symbol&&n[Symbol.iterator]||n["@@iterator"];if(null==e)return;var t,o,u=[],i=!0,f=!1;try{for(e=e.call(n);!(i=(t=e.next()).done)&&(u.push(t.value),!r||u.length!==r);i=!0);}catch(n){f=!0,o=n}finally{try{i||null==e.return||e.return()}finally{if(f)throw o}}return u}(n,r)||function(n,r){if(!n)return;if("string"==typeof n)return e(n,r);var t=Object.prototype.toString.call(n).slice(8,-1);"Object"===t&&n.constructor&&(t=n.constructor.name);if("Map"===t||"Set"===t)return Array.from(n);if("Arguments"===t||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t))return e(n,r)}(n,r)||function(){throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function e(n,r){(null==r||r>n.length)&&(r=n.length);for(var e=0,t=new Array(r);e<r;e++)t[e]=n[e];return t}var t=function(){for(var n=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"",r=[],e=4;e>=0;e-=1)if(!(n.length-1<=e)){var t=n.substr(n.length-e-1,n.length-1);r.push(t)}return r},o=function(n){var r=[];return Object.keys(n).forEach((function(e){var t,o,u=(t=n[e],o=e,Object.keys(t).map((function(n){return{from:o,to:n,yes:t[n]}})).sort((function(n,r){return n.yes>r.yes?-1:n.yes<r.yes?1:0})));u[0]&&u[0].yes>1&&r.push(u[0])})),r=r.sort((function(n,r){return n.yes>r.yes?-1:n.yes<r.yes?1:0}))},u=function(n,r){var e=function(n,r){return n.map((function(n){var e=0,t=0,o={};return r.forEach((function(r){if(r[0].endsWith(n.from)){var u=new RegExp(n.from+"$");r[0].replace(u,n.to)===r[1]?e+=1:(t+=1,o[r[0]]=r[1])}})),{from:n.from,to:n.to,yes:e,no:t,percent:e/(e+t),exceptions:o}}))}(n,r);return e=(e=e.filter((function(n){return n.yes>1&&n.yes>n.no}))).sort((function(n,r){return n.yes>r.yes?-1:n.yes<r.yes?1:0}))};function i(n){return n.split("").reverse().join("")}var f=function(n){return(n=n.sort((function(n,r){return n.from.length>r.from.length?-1:n.from.length<r.from.length||(n=i(n.from))>(r=i(r.from))?1:n<r?-1:0}))).map((function(n){return[n.from,n.to,n.yes]}))},c=function(n){var e,i,c=function(n){var e={};return n.forEach((function(n){var o=r(n,2),u=o[0],i=o[1],f=t(u);f.push(""),f.forEach((function(n){e[n]=e[n]||{},t(i).forEach((function(r){e[n][r]=e[n][r]||0,e[n][r]+=1}))}))})),e}(n=n.filter((function(n){return n&&n[0]&&n[1]}))),s=o(c),a=u(s,n);return i={},(e=a).forEach((function(n,r){e.slice(r+1,e.length).forEach((function(r){r.from.endsWith(n.from)&&(i[r.from]=!0)}))})),function(n,r){var e={};n.forEach((function(n){Object.assign(e,n.exceptions)}));var t=r.filter((function(r){return!e.hasOwnProperty(r[0])&&!n.find((function(n){return r[0].endsWith(n.from)}))})),o=(r.length-t.length)/r.length;return{rules:f(n),exceptions:e,coverage:o,remaining:t}}(a=e=e.filter((function(n){return!1===i.hasOwnProperty(n.from)})),n)},s=function(n,r){var e,t=0;return n.rules=n.rules.map((function(n){return t+=n[2],n.slice(0,2)})),n.exceptions=n.exceptions.reduce((function(n,r){return n[r[0]]=r[1],n}),{}),n.rules=n.rules.sort((function(n,r){return n[0].length>r[0].length?-1:n[0].length<r[0].length?1:0})),n.coverage=(e=t/r,Math.round(1e3*e)/1e3),n};n.convert=function(n,r){if(r.exceptions.hasOwnProperty(n))return r.exceptions[n];for(var e=0;e<r.rules.length;e+=1){var t=r.rules[e][0];if(n.endsWith(t)){var o=new RegExp(t+"$");return n.replace(o,r.rules[e][1])}}return null},n.find=function(n){var r=n.length,e={},t=c(n);return e.rules=t.rules||[],e.exceptions=t.remaining.concat(Object.entries(t.exceptions)),e=s(e,r)},Object.defineProperty(n,"__esModule",{value:!0})})); |
{ | ||
"name": "suffix-thumb", | ||
"description": "learn transformations between two sets of words", | ||
"version": "0.1.0", | ||
"version": "1.0.0", | ||
"author": "Spencer Kelly <spencermountain@gmail.com> (http://spencermounta.in)", | ||
"main": "./builds/suffix-thumb.js", | ||
"main": "./builds/suffix-thumb.mjs", | ||
"unpkg": "./builds/suffix-thumb.min.js", | ||
"module": "./builds/suffix-thumb.mjs", | ||
"types": "types/index.d.ts", | ||
"type": "module", | ||
"exports": { | ||
".": { | ||
"import": "./builds/suffix-thumb.mjs", | ||
"require": "./builds/suffix-thumb.js" | ||
}, | ||
"./find": { | ||
"import": "./src/find/index.js" | ||
}, | ||
"./convert": { | ||
"import": "./src/convert/index.js" | ||
} | ||
}, | ||
"repository": { | ||
@@ -15,4 +28,4 @@ "type": "git", | ||
"scripts": { | ||
"test": "tape \"./tests/**/*.test.js\" | tap-dancer --color always", | ||
"testb": "TESTENV=prod tape \"./tests/**/*.test.js\" | tap-dancer --color always", | ||
"test": "tape-es \"./test/**/*.test.js\" | tap-dancer --color always", | ||
"testb": "TESTENV=prod tape-es \"./test/**/*.test.js\" | tap-dancer --color always", | ||
"watch": "amble ./scratch.js", | ||
@@ -35,19 +48,20 @@ "build": "rollup -c --silent" | ||
], | ||
"dependencies": {}, | ||
"devDependencies": { | ||
"@babel/core": "7.9.0", | ||
"@babel/preset-env": "7.9.5", | ||
"@rollup/plugin-commonjs": "11.1.0", | ||
"@rollup/plugin-json": "4.0.3", | ||
"@rollup/plugin-node-resolve": "7.1.3", | ||
"amble": "1.0.0", | ||
"efrt": "2.2.2", | ||
"rollup": "2.6.1", | ||
"@babel/core": "7.14.6", | ||
"@babel/preset-env": "7.14.7", | ||
"@rollup/plugin-commonjs": "19.0.0", | ||
"@rollup/plugin-json": "4.1.0", | ||
"@rollup/plugin-node-resolve": "13.0.0", | ||
"amble": "1.3.0", | ||
"efrt": "2.3.0", | ||
"eslint": "7.30.0", | ||
"rollup": "2.53.0", | ||
"rollup-plugin-babel": "4.4.0", | ||
"rollup-plugin-filesize-check": "0.0.1", | ||
"rollup-plugin-terser": "5.3.0", | ||
"tap-dancer": "0.2.0", | ||
"tape": "4.13.2" | ||
"rollup-plugin-terser": "7.0.2", | ||
"tap-dancer": "0.3.4", | ||
"tape": "5.2.2", | ||
"tape-es": "1.2.15" | ||
}, | ||
"license": "MIT" | ||
} |
@@ -13,3 +13,3 @@ <div align="center"> | ||
<a href="https://unpkg.com/suffix-thumb/builds/suffix-thumb.min.js"> | ||
<img src="https://badge-size.herokuapp.com/spencermountain/compromise/master/plugins/ngrams/builds/suffix-thumb.min.js" /> | ||
<img src="https://badge-size.herokuapp.com/spencermountain/suffix-thumb/master/builds/suffix-thumb.min.js" /> | ||
</a> | ||
@@ -34,3 +34,3 @@ | ||
```js | ||
const thumb = require('suffix-thumb') | ||
import { find, convert } from 'suffix-thumb' | ||
@@ -42,7 +42,6 @@ const pairs = [ | ||
] | ||
let res = thumb(pairs) | ||
let model = find(pairs) | ||
/* { rules: [ ['alk', 'alked'] ], | ||
exceptions: {}, | ||
exceptions: {go:'went'}, | ||
coverage: 0.66, | ||
remaining: [ ['go', 'went'] ] | ||
}*/ | ||
@@ -57,3 +56,3 @@ | ||
] | ||
let res = thumb(pairs) | ||
let model = find(pairs) | ||
/* | ||
@@ -69,2 +68,5 @@ { | ||
*/ | ||
let out = convert('snafoo', model) | ||
// 'snabar' | ||
``` | ||
@@ -84,6 +86,4 @@ | ||
## See also | ||
Ideally, you should be able to take a list of word-pairs, create a model for them, and then delete the 2nd half of the word pairs. | ||
- [nlp-thumb](https://github.com/nlp-compromise/thumb) - a classifier by word-suffix | ||
MIT |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No v1
QualityPackage is not semver >=1. This means it is not stable and does not support ^ ranges.
Found 1 instance in 1 package
542
1
Yes
23655
15
6