suffix-thumb
Advanced tools
Comparing version 4.0.2 to 5.0.0
{ | ||
"name": "suffix-thumb", | ||
"description": "learn transformations between two sets of words", | ||
"version": "4.0.2", | ||
"version": "5.0.0", | ||
"author": "Spencer Kelly <spencermountain@gmail.com> (http://spencermounta.in)", | ||
@@ -22,4 +22,5 @@ "main": "./src/index.js", | ||
"scripts": { | ||
"test": "tape \"./test/**/*.test.js\" | tap-dancer --color always", | ||
"testb": "TESTENV=prod tape \"./test/**/*.test.js\" | tap-dancer --color always", | ||
"test": "tape \"./tests/**/*.test.js\" | tap-dancer --color always", | ||
"testb": "TESTENV=prod tape \"./tests/**/*.test.js\" | tap-dancer --color always", | ||
"size": "node scripts/filesizes.js", | ||
"watch": "amble ./scratch.js", | ||
@@ -42,3 +43,6 @@ "build": "rollup -c --silent" | ||
], | ||
"dependencies": {}, | ||
"dependencies": { | ||
"efrt": "^2.7.0", | ||
"json-diff": "^1.0.0" | ||
}, | ||
"devDependencies": { | ||
@@ -45,0 +49,0 @@ "@rollup/plugin-commonjs": "21.0.1", |
@@ -63,2 +63,12 @@ <div align="center"> | ||
``` | ||
you can pass-in options: | ||
```js | ||
let opts={ | ||
threshold:80, //how sloppy our initial rules can be | ||
min:0, //rule must satisfy # of pairs | ||
reverse:true, //compute backward transformation, too | ||
} | ||
let model = learn(pairs, opts) | ||
``` | ||
<!-- spacer --> | ||
@@ -141,3 +151,3 @@ <img height="50px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
``` | ||
<!-- | ||
### Classify | ||
@@ -160,3 +170,3 @@ the model can also be used to classify whether a given word belongs to either Left or Right sides. | ||
Unlike convert, the classifier is not guarnteed to return 100% on the training data. | ||
The classifier will generally hit high-90s on the given dataset, but how-well it generalizes to novel input is up-to the dataset. | ||
The classifier will generally hit high-90s on the given dataset, but how-well it generalizes to novel input is up-to the dataset. --> | ||
@@ -163,0 +173,0 @@ <!-- spacer --> |
@@ -1,59 +0,59 @@ | ||
const prefix = /^.([0-9]+)/ | ||
// handle compressed form of key-value pair | ||
const getKeyVal = function (word, model) { | ||
let val = model.exceptions[word] | ||
let m = val.match(prefix) | ||
if (m === null) { | ||
// return not compressed form | ||
return model.exceptions[word] | ||
// 01- full-word exceptions | ||
const checkEx = function (str, ex = {}) { | ||
if (ex.hasOwnProperty(str)) { | ||
return ex[str] | ||
} | ||
// uncompress it | ||
let num = Number(m[1]) || 0 | ||
let pre = word.substr(0, num) | ||
return pre + val.replace(prefix, '') | ||
return null | ||
} | ||
// get suffix-rules according to last char of word | ||
const getRules = function (word, rules = {}) { | ||
let char = word[word.length - 1] | ||
let list = rules[char] || [] | ||
// do we have a generic suffix? | ||
if (rules['']) { | ||
list = list.concat(rules['']) | ||
// 02- suffixes that pass our word through | ||
const checkSame = function (str, same = []) { | ||
for (let i = 0; i < same.length; i += 1) { | ||
if (str.endsWith(same[i])) { | ||
return str | ||
} | ||
} | ||
return list | ||
return null | ||
} | ||
const convert = function (word, model, debug) { | ||
// check list of irregulars | ||
if (model.exceptions.hasOwnProperty(word)) { | ||
if (debug) { | ||
console.log("exception, ", word, model.exceptions[word]) | ||
// 03- check rules - longest first | ||
const checkRules = function (str, fwd, both = {}) { | ||
fwd = fwd || {} | ||
let max = str.length - 1 | ||
// look for a matching suffix | ||
for (let i = max; i >= 1; i -= 1) { | ||
let size = str.length - i | ||
let suff = str.substring(size, str.length) | ||
// check fwd rules, first | ||
if (fwd.hasOwnProperty(suff) === true) { | ||
return str.slice(0, size) + fwd[suff] | ||
} | ||
return getKeyVal(word, model) | ||
// check shared rules | ||
if (both.hasOwnProperty(suff) === true) { | ||
return str.slice(0, size) + both[suff] | ||
} | ||
} | ||
// if model is reversed, try rev rules | ||
let rules = model.rules | ||
if (model.reversed) { | ||
rules = model.rev | ||
// try a fallback transform | ||
if (fwd.hasOwnProperty('')) { | ||
return str += fwd[''] | ||
} | ||
// try suffix rules | ||
rules = getRules(word, rules) | ||
for (let i = 0; i < rules.length; i += 1) { | ||
let suffix = rules[i][0] | ||
if (word.endsWith(suffix)) { | ||
if (debug) { | ||
console.log("rule, ", rules[i]) | ||
} | ||
let reg = new RegExp(suffix + '$') | ||
return word.replace(reg, rules[i][1]) | ||
} | ||
if (both.hasOwnProperty('')) { | ||
return str += both[''] | ||
} | ||
if (debug) { | ||
console.log(' x - ' + word) | ||
} | ||
// return the original word unchanged | ||
return word | ||
return null | ||
} | ||
export default convert | ||
//sweep-through all suffixes | ||
const convert = function (str = '', model = {}) { | ||
// 01- check exceptions | ||
let out = checkEx(str, model.ex) | ||
// 02 - check same | ||
out = out || checkSame(str, model.same) | ||
// check forward and both rules | ||
out = out || checkRules(str, model.fwd, model.both) | ||
//return unchanged | ||
out = out || str | ||
return out | ||
} | ||
export default convert |
import convert from './convert/index.js' | ||
import compress from './compress/index.js' | ||
import uncompress from './compress/uncompress.js' | ||
// import compress from './compress/index.js' | ||
// import uncompress from './compress/uncompress.js' | ||
import reverse from './reverse/index.js' | ||
import test from './test/index.js' | ||
import learn from './learn/index.js' | ||
// import test from './test/index.js' | ||
// import old from './learn/index.js' | ||
import validate from './validate/index.js' | ||
import classify from './classify/index.js' | ||
// import classify from './classify/index.js' | ||
// import fingerprint from './fingerprint/index.js' | ||
export { learn, convert, compress, uncompress, reverse, validate, test, classify } | ||
import learn from './learn/index.js' | ||
import compress from './pack/pack.js' | ||
import uncompress from './pack/unpack.js' | ||
import test from './test/index.js' | ||
// import again from './again/index.js' | ||
export { learn, convert, compress, uncompress, reverse, validate, test } |
@@ -1,25 +0,46 @@ | ||
import learn from './learn.js' | ||
import { indexRules } from '../_lib.js' | ||
import prepare from './00-prep.js' | ||
import findRules from './01-findRules.js' | ||
import shareBackward from './02-share-back.js' | ||
const mergeExceptions = function (fwd, bkwd) { | ||
Object.entries(bkwd).forEach(b => { | ||
fwd[b[1]] = b[0] //reverse | ||
}) | ||
return fwd | ||
const defaults = { | ||
threshold: 80, | ||
min: 0 | ||
} | ||
const swap = (a) => [a[1], a[0]] | ||
const learnBoth = function (pairs, opts = {}) { | ||
let fwd = learn(pairs, opts) | ||
// learn backward too? | ||
const learn = function (pairs, opts = {}) { | ||
opts = Object.assign({}, defaults, opts) | ||
let ex = {} | ||
let rev = {} | ||
pairs = prepare(pairs, ex) | ||
// get forward-dir rules | ||
let { rules, pending, finished } = findRules(pairs, [], opts) | ||
// move some to both | ||
let { fwd, both, revPairs } = shareBackward(rules, pairs.map(swap), opts) | ||
// generate remaining reverse-dir rules | ||
let pendingBkwd = [] | ||
if (opts.reverse !== false) { | ||
pairs = pairs.map(a => [a[1], a[0]]) | ||
let bkwd = learn(pairs, opts) | ||
// merge exceptions | ||
fwd.exceptions = mergeExceptions(fwd.exceptions, bkwd.exceptions) | ||
// add rules | ||
fwd.rev = indexRules(bkwd.rules) | ||
// console.log(revPairs.pending) | ||
let bkwd = findRules(revPairs.pending, revPairs.finished, opts) | ||
pendingBkwd = bkwd.pending | ||
rev = bkwd.rules | ||
} | ||
fwd.rules = indexRules(fwd.rules) | ||
return fwd | ||
// console.log(pending.length, 'pending fwd') | ||
// console.log(pendingBkwd.length, 'pending Bkwd') | ||
// add anything remaining as an exception | ||
if (opts.min <= 1) { | ||
pending.forEach(arr => { | ||
ex[arr[0]] = arr[1] | ||
}) | ||
pendingBkwd.forEach(arr => { | ||
ex[arr[1]] = arr[0] | ||
}) | ||
} | ||
return { | ||
fwd, | ||
both, | ||
rev, | ||
ex, | ||
} | ||
} | ||
export default learnBoth | ||
export default learn |
@@ -1,2 +0,2 @@ | ||
const reverseObj = function (obj) { | ||
const flipObj = function (obj) { | ||
return Object.entries(obj).reduce((h, a) => { | ||
@@ -8,12 +8,12 @@ h[a[1]] = a[0] | ||
const reverse = function (model) { | ||
let { rules, exceptions, rev } = model | ||
exceptions = reverseObj(exceptions) | ||
const reverse = function (model = {}) { | ||
return { | ||
reversed: !Boolean(model.reversed),//toggle this | ||
rules, | ||
exceptions, | ||
rev | ||
reversed: true, | ||
// keep these two | ||
both: flipObj(model.both), | ||
ex: flipObj(model.ex), | ||
// swap this one in | ||
fwd: model.rev || {} | ||
} | ||
} | ||
export default reverse |
@@ -1,87 +0,35 @@ | ||
import { learn, convert, compress, uncompress, reverse } from '../index.js' | ||
import filesize from './filesize.js' | ||
import testSide from './classify.js' | ||
import { convert, reverse, validate } from '../index.js' | ||
const green = str => '\x1b[32m' + str + '\x1b[0m' | ||
const red = str => '\x1b[31m' + str + '\x1b[0m' | ||
const cyan = str => '\x1b[36m' + str + '\x1b[0m' | ||
const blue = str => '\x1b[34m' + str + '\x1b[0m' | ||
const yellow = str => '\x1b[33m' + str + '\x1b[0m' | ||
const dim = str => '\x1b[2m' + str + '\x1b[0m' | ||
const testFwd = function (pairs, model) { | ||
let wrong = 0 | ||
pairs.forEach((a) => { | ||
let created = convert(a[0], model) | ||
if (created !== a[1]) { | ||
wrong += 1 | ||
console.log(red('error:'), yellow(a[0] + ' →' + created)) | ||
} | ||
}) | ||
if (wrong === 0) { | ||
console.log(green(` ✓ forward`)) | ||
} else { | ||
console.log(red(` ✗ ${wrong} `) + 'errors\n') | ||
} | ||
return wrong | ||
} | ||
const percent = (part, total) => { | ||
let num = (part / total) * 100; | ||
num = Math.round(num * 10) / 10; | ||
return num + '%' | ||
}; | ||
const testBack = function (pairs, model) { | ||
let wrong = 0 | ||
let rev = reverse(model) | ||
pairs.forEach((a) => { | ||
let created = convert(a[1], rev) | ||
if (created !== a[0]) { | ||
wrong += 1 | ||
console.log(red(' rev ✗: '), yellow(a[1] + ' → ' + created)) | ||
const swap = (a) => [a[1], a[0]] | ||
const getNum = function (pairs, model) { | ||
let right = 0 | ||
let wrong = [] | ||
pairs.forEach(a => { | ||
let have = convert(a[0], model) | ||
if (have === a[1]) { | ||
right += 1 | ||
} else { | ||
console.log('❌ ', a, '→ ' + have) | ||
wrong.push(a) | ||
} | ||
}) | ||
if (wrong === 0) { | ||
console.log(green(` ✓ backward`)) | ||
} else { | ||
console.log(red(` ✗ ${wrong} `) + 'errors reversed\n') | ||
} | ||
return wrong | ||
return percent(right, pairs.length) | ||
} | ||
const testSize = function (pairs, model) { | ||
let before = filesize(pairs) | ||
let smol = compress(model) | ||
let after = filesize(smol) | ||
console.log(` ${dim(before)} -> ${blue(after)}`) | ||
const test = function (pairs, model = {}) { | ||
pairs = validate(pairs) | ||
let fwdScore = getNum(pairs, model) | ||
let bkwdScore = getNum(pairs.map(swap), reverse(model)) | ||
console.log(`${blue(fwdScore)} - 🔄 ${cyan(bkwdScore)}`) | ||
} | ||
const stats = function (model) { | ||
let rules = 0 | ||
Object.keys(model.rules).forEach(k => rules += model.rules[k].length) | ||
let rev = 0 | ||
Object.keys(model.rev || {}).forEach(k => rev += model.rev[k].length) | ||
let exc = Object.keys(model.exceptions).length | ||
console.log(` ${blue(rules.toLocaleString())} rules, ${yellow(rev.toLocaleString())} reversed, ${blue(exc.toLocaleString())} exceptions`) | ||
} | ||
const test = function (pairs, opts) { | ||
console.log('\n') | ||
console.log(yellow(pairs.length.toLocaleString()) + ` pairs - ${dim(filesize(pairs))}`) | ||
let begin = new Date() | ||
let model = learn(pairs, opts) | ||
let end = new Date() | ||
console.log(' ', (end.getTime() - begin.getTime()) / 1000, 'seconds') | ||
console.log(yellow('\nSize:')) | ||
stats(model) | ||
testSize(pairs, model) | ||
model = compress(model) | ||
model = uncompress(model) | ||
console.log(yellow('\nForward:')) | ||
testFwd(pairs, model) | ||
console.log(yellow('\nBackward:')) | ||
testBack(pairs, model) | ||
// hmm | ||
// console.log(yellow('\nClassify:')) | ||
// testSide(pairs, model, 'Left') | ||
// testSide(pairs, model, 'Right') | ||
} | ||
export default test |
// make sure inputs are not impossible to square-up | ||
const validate = function (pairs, opts = {}) { | ||
let left = {} | ||
let right = {} | ||
let left = new Set() | ||
let right = new Set() | ||
pairs = pairs.filter(a => { | ||
if (left[a[0]] !== undefined) { | ||
if (opts.debug) { | ||
console.warn('Duplicate left side:') | ||
console.log(' 1.', [a[0], left[a[0]]]) | ||
console.log(' 2.', a) | ||
} | ||
if (left.has(a[0])) { | ||
// console.log('dupe', a) | ||
return false | ||
} | ||
if (right[a[1]] !== undefined) { | ||
if (opts.debug) { | ||
console.warn('Duplicate right side:') | ||
console.log(' 1.', [right[a[1]], a[1]]) | ||
console.log(' 2.', a) | ||
} | ||
if (opts.inverse === false) { | ||
return true //allow it | ||
} | ||
if (right.has(a[1])) { | ||
// console.log('dupe', a) | ||
return false | ||
} | ||
left[a[0]] = a[1] | ||
right[a[1]] = a[0] | ||
left.add(a[0]) | ||
right.add(a[1]) | ||
// ensure pairs are aligned by prefix | ||
// if (a[0].substring(0, 1) !== a[1].substring(0, 1)) { | ||
// console.log('pair not aligned at prefix:', a) | ||
// return false | ||
// } | ||
return true | ||
@@ -28,0 +23,0 @@ }) |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
201
52796
2
23
1581
+ Addedefrt@^2.7.0
+ Addedjson-diff@^1.0.0
+ Added@ewoudenberg/difflib@0.1.0(transitive)
+ Addedcolors@1.4.0(transitive)
+ Addeddreamopt@0.8.0(transitive)
+ Addedefrt@2.7.0(transitive)
+ Addedheap@0.2.7(transitive)
+ Addedjson-diff@1.0.6(transitive)
+ Addedwordwrap@1.0.0(transitive)