words-to-numbers
Advanced tools
Comparing version 1.2.1 to 1.2.2
@@ -7,2 +7,6 @@ 'use strict'; | ||
var _assign = require('babel-runtime/core-js/object/assign'); | ||
var _assign2 = _interopRequireDefault(_assign); | ||
var _keys = require('babel-runtime/core-js/object/keys'); | ||
@@ -12,6 +16,2 @@ | ||
var _assign = require('babel-runtime/core-js/object/assign'); | ||
var _assign2 = _interopRequireDefault(_assign); | ||
exports.wordsToNumbers = wordsToNumbers; | ||
@@ -27,50 +27,56 @@ | ||
var _ohmJs = require('ohm-js'); | ||
var _ohmJs2 = _interopRequireDefault(_ohmJs); | ||
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } | ||
// inpired by this answer on stackoverflow: http://stackoverflow.com/a/12014376 by http://stackoverflow.com/users/631193/javaandcsharp and thanks to @Greg Hewgill for the original, written in Python. | ||
var PRIMARY_COUNT = { | ||
var UNIT = { | ||
zero: 0, | ||
a: 1, | ||
first: 1, | ||
one: 1, | ||
first: 1, | ||
second: 2, | ||
two: 2, | ||
second: 2, | ||
third: 3, | ||
thirteenth: 13, | ||
thirteen: 13, | ||
three: 3, | ||
third: 3, | ||
fourth: 4, | ||
fourteenth: 14, | ||
fourteen: 14, | ||
four: 4, | ||
fourth: 4, | ||
fifteenth: 15, | ||
fifteen: 15, | ||
fifth: 5, | ||
five: 5, | ||
fifth: 5, | ||
sixth: 6, | ||
sixteenth: 16, | ||
sixteen: 16, | ||
six: 6, | ||
sixth: 6, | ||
seventeenth: 17, | ||
seventeen: 17, | ||
seventh: 7, | ||
seven: 7, | ||
seventh: 7, | ||
eighteenth: 18, | ||
eighteen: 18, | ||
eighth: 8, | ||
eight: 8, | ||
eighth: 8, | ||
nineteenth: 19, | ||
nineteen: 19, | ||
ninth: 9, | ||
nine: 9, | ||
ninth: 9, | ||
tenth: 10, | ||
ten: 10, | ||
tenth: 10, | ||
eleventh: 11, | ||
eleven: 11, | ||
eleventh: 11, | ||
twelfth: 12, | ||
twelve: 12, | ||
twelfth: 12, | ||
thirteen: 13, | ||
thirteenth: 13, | ||
fourteen: 14, | ||
fourteenth: 14, | ||
fifteen: 15, | ||
fifteenth: 15, | ||
sixteen: 16, | ||
sixteenth: 16, | ||
seventeen: 17, | ||
seventeenth: 17, | ||
eighteen: 18, | ||
eighteenth: 18, | ||
nineteen: 19, | ||
nineteenth: 19 | ||
}; | ||
a: 1 | ||
}; /* Node module for converting words to numerals. | ||
Convert words to numbers. Optionally fuzzy match the words to numbers. | ||
`npm install words-to-numbers` | ||
If the whole string passed is a number then it will return a Number type otherwise it will return the original string with all instances of numbers replaced. | ||
*/ | ||
var SECONDARY_COUNT = { | ||
var TEN = { | ||
twenty: 20, | ||
@@ -94,4 +100,2 @@ twentieth: 20, | ||
var COUNT = (0, _assign2.default)({}, PRIMARY_COUNT, SECONDARY_COUNT); | ||
var MAGNITUDE = { | ||
@@ -113,79 +117,61 @@ hundred: 100, | ||
var NUMBER_WORDS = (0, _keys2.default)(COUNT).concat((0, _keys2.default)(MAGNITUDE)).concat(['and', 'point']); | ||
// all words found in number phrases | ||
var NUMBER_WORDS = ['and', 'point'].concat((0, _keys2.default)(UNIT)).concat((0, _keys2.default)(TEN)).concat((0, _keys2.default)(MAGNITUDE)); | ||
var clean = function clean(word) { | ||
return word.replace(',', ''); | ||
}; | ||
var PUNCTUATION = /[.,\/#!$%\^&\*;:{}=\-_`~()]/g; | ||
var extractNumberRegions = function extractNumberRegions(words) { | ||
var numWords = words.length; | ||
var grammar = _ohmJs2.default.grammar('\n WordsToNumbers {\n Number = Section* "point"? unit*\n Section = TenAndMagnitude | UnitAndMagnitude | TenUnitAndMagnitude | Unit | Ten | TenAndUnit | Magnitude\n Ten = ten ~unit ~magnitude\n TenAndUnit = ten unit ~magnitude\n TenAndMagnitude = ten ~unit magnitude\n UnitAndMagnitude = ~ten unit magnitude\n TenUnitAndMagnitude = ten unit magnitude\n Unit = ~ten unit ~magnitude\n Magnitude = ~ten ~unit magnitude\n ten = ' + (0, _keys2.default)(TEN).map(function (key) { | ||
return '"' + key + '" | '; | ||
}).join('').slice(0, -2) + '\n unit = ' + (0, _keys2.default)(UNIT).map(function (key) { | ||
return '"' + key + '" | '; | ||
}).join('').slice(0, -2) + '\n magnitude = ' + (0, _keys2.default)(MAGNITUDE).map(function (key) { | ||
return '"' + key + '" | '; | ||
}).join('').slice(0, -2) + '\n }\n'); | ||
var numberWords = words.map(function (word) { | ||
return NUMBER_WORDS.includes(clean(word)); | ||
}); | ||
var pointReached = false; | ||
var reduced = numberWords.reduce(function (acc, isNumberWord, i) { | ||
if (isNumberWord) { | ||
if (words[i] === 'point') pointReached = true; | ||
if (!(0, _itsSet2.default)(acc.start)) { | ||
acc.start = i; | ||
} else if ((0, _keys2.default)(PRIMARY_COUNT).includes(words[i - 1]) && (0, _keys2.default)(PRIMARY_COUNT).includes(words[i]) && !pointReached) { | ||
acc.regions.push({ start: acc.start, end: i - 1 }); | ||
if (i === numWords - 1) { | ||
acc.regions.push({ start: i, end: i }); | ||
} else { | ||
acc.start = i; | ||
} | ||
} | ||
} else if ((0, _itsSet2.default)(acc.start)) { | ||
acc.regions.push({ start: acc.start, end: i - 1 }); | ||
acc.start = null; | ||
var semantics = grammar.createSemantics().addOperation('eval', { | ||
Number: function Number(sections, point, decimal) { | ||
var ints = sections.children.reduce(function (sum, child) { | ||
return sum + child.eval(); | ||
}, 0); | ||
if (point.children.length) { | ||
var decimals = decimal.children.reduce(function (acc, d) { | ||
return '' + acc + d.eval(); | ||
}, '').replace(/\s/g, ''); | ||
return parseFloat(ints + '.' + decimals); | ||
} | ||
return acc; | ||
}, { regions: [], start: null }); | ||
return reduced.start === 0 && !reduced.regions.length ? 'whole' : reduced.regions; | ||
}; | ||
var convertWordsToDecimal = function convertWordsToDecimal(words) { | ||
return words.map(function (word) { | ||
return COUNT[word]; | ||
}).join(''); | ||
}; | ||
var convertWordsToNonDecimal = function convertWordsToNonDecimal(words) { | ||
var reduced = words.reduce(function (acc, word) { | ||
var cleanWord = clean(word); | ||
if (cleanWord === 'and') return acc; | ||
if ((0, _itsSet2.default)(acc.count)) { | ||
if ((0, _itsSet2.default)(COUNT[cleanWord])) { | ||
acc.extra += COUNT[acc.count]; | ||
acc.count = cleanWord; | ||
} else { | ||
acc.pairs.push({ count: acc.count, magnitude: cleanWord }); | ||
acc.count = null; | ||
} | ||
} else { | ||
acc.count = cleanWord; | ||
} | ||
return acc; | ||
}, { pairs: [], count: null, extra: 0 }); | ||
return reduced.pairs.reduce(function (acc, pair) { | ||
return acc + COUNT[pair.count] * MAGNITUDE[pair.magnitude]; | ||
}, COUNT[reduced.count] || 0) + reduced.extra; | ||
}; | ||
var convertWordsToNumber = function convertWordsToNumber(words) { | ||
var pointIndex = words.indexOf('point'); | ||
if (pointIndex > -1) { | ||
var numberWords = words.slice(0, pointIndex); | ||
var decimalWords = words.slice(pointIndex + 1); | ||
return parseFloat(convertWordsToNonDecimal(numberWords) + '.' + convertWordsToDecimal(decimalWords)); | ||
return ints; | ||
}, | ||
Ten: function Ten(ten) { | ||
return ten.eval(); | ||
}, | ||
Unit: function Unit(unit) { | ||
return unit.eval(); | ||
}, | ||
TenAndUnit: function TenAndUnit(ten, unit) { | ||
return ten.eval() + unit.eval(); | ||
}, | ||
TenAndMagnitude: function TenAndMagnitude(ten, magnitude) { | ||
return ten.eval() * magnitude.eval(); | ||
}, | ||
UnitAndMagnitude: function UnitAndMagnitude(unit, magnitude) { | ||
return unit.eval() * magnitude.eval(); | ||
}, | ||
TenUnitAndMagnitude: function TenUnitAndMagnitude(ten, unit, magnitude) { | ||
return (ten.eval() + unit.eval()) * magnitude.eval(); | ||
}, | ||
Magnitude: function Magnitude(magnitude) { | ||
return magnitude.eval(); | ||
}, | ||
unit: function unit(value) { | ||
return UNIT[value.primitiveValue]; | ||
}, | ||
ten: function ten(value) { | ||
return TEN[value.primitiveValue]; | ||
}, | ||
magnitude: function magnitude(value) { | ||
return MAGNITUDE[value.primitiveValue]; | ||
} | ||
return convertWordsToNonDecimal(words); | ||
}; | ||
}); | ||
// try coerce a word into a NUMBER_WORD using fuzzy matching | ||
var fuzzyMatch = function fuzzyMatch(word) { | ||
@@ -202,25 +188,93 @@ return NUMBER_WORDS.map(function (numberWord) { | ||
function wordsToNumbers(text, options) { | ||
var opts = (0, _assign2.default)({ fuzzy: false }, options); | ||
var words = text.toString().split(/[\s-]+/); | ||
if (opts.fuzzy) words = words.map(function (word) { | ||
return fuzzyMatch(word); | ||
var isUnit = function isUnit(word) { | ||
return (0, _keys2.default)(UNIT).indexOf(word) !== -1; | ||
}; | ||
var isTen = function isTen(word) { | ||
return (0, _keys2.default)(TEN).indexOf(word) !== -1; | ||
}; | ||
var isMag = function isMag(word) { | ||
return (0, _keys2.default)(MAGNITUDE).indexOf(word) !== -1; | ||
}; | ||
var findRegions = function findRegions(text, fuzzy) { | ||
var words = text.split(/[ -]/g).map(function (word) { | ||
return fuzzy ? fuzzyMatch(word) : word; | ||
}).reduce(function (acc, word, i) { | ||
var start = acc.length ? acc[i - 1].end + 1 : 0; | ||
return acc.concat({ | ||
text: word, | ||
start: start, | ||
end: start + word.length | ||
}); | ||
}, []).map(function (word) { | ||
return (0, _assign2.default)({}, word, { | ||
isNumberWord: NUMBER_WORDS.indexOf(word.text.replace(PUNCTUATION, '').toLowerCase()) !== -1 | ||
}); | ||
}); | ||
var regions = extractNumberRegions(words); | ||
if (regions === 'whole') return convertWordsToNumber(words); | ||
if (!regions.length) return null; | ||
return words.reduce(function (regions, word, index) { | ||
if (!word.isNumberWord) return regions; | ||
if (!regions.length) return [word]; | ||
if (word.text === 'point') { | ||
var newRegions = regions.slice(); | ||
newRegions[regions.length - 1].pointReached = true; | ||
newRegions[regions.length - 1].end = word.end; | ||
newRegions[regions.length - 1].text += ' ' + word.text; | ||
return newRegions; | ||
} | ||
var prevRegion = regions[regions.length - 1]; | ||
var prevWord = words[index - 1] || ''; | ||
if (prevRegion.end === word.start - 1 && !(isUnit(word.text) && isUnit(prevWord.text) || prevRegion.pointReached) && !(isTen(word.text) && isTen(prevWord.text)) && !(isMag(word.text) && isMag(prevWord.text)) && !(isTen(word.text) && isUnit(prevWord.text)) || prevRegion.pointReached && isUnit(word.text) || word === 'and' || prevWord === 'and') { | ||
var _newRegions = regions.slice(); | ||
_newRegions[regions.length - 1].end = word.end; | ||
_newRegions[regions.length - 1].text += ' ' + word.text; | ||
return _newRegions; | ||
} | ||
return regions.concat(word); | ||
}, []); | ||
}; | ||
var removedWordsCount = 0; | ||
var evaluateNumberRegion = function evaluateNumberRegion(text) { | ||
var textIsOnlyHelperWord = ['a', 'and'].reduce(function (acc, word) { | ||
return acc || text === word; | ||
}, false); | ||
if (textIsOnlyHelperWord) return text; | ||
var m = grammar.match(text.replace(PUNCTUATION, ' ').replace(/\band\b/g, '')); | ||
if (m.succeeded()) { | ||
return semantics(m).eval(); | ||
} else { | ||
console.log(m.message); | ||
return text; | ||
} | ||
}; | ||
function splice(str, index, count, add) { | ||
var i = index; | ||
if (i < 0) { | ||
i = str.length + i; | ||
if (i < 0) { | ||
i = 0; | ||
} | ||
} | ||
return str.slice(0, i) + (add || '') + str.slice(i + count); | ||
} | ||
// replace all number words in a string with actual numerals. | ||
// If string contains multiple separate numbers then replace each one individually. | ||
// If option `fuzzy` = true then try coerce words into numbers before conversion to numbers. | ||
function wordsToNumbers(text, options) { | ||
var opts = (0, _assign2.default)({ fuzzy: false }, options); | ||
var regions = findRegions(text, opts.fuzzy); | ||
if (!regions.length) return text; | ||
if (regions.length === 1 && regions[0].start === 0 && regions[0].end === regions[0].text.length) { | ||
return evaluateNumberRegion(regions[0].text); | ||
} | ||
return regions.map(function (region) { | ||
return convertWordsToNumber(words.slice(region.start, region.end + 1)); | ||
}).reduce(function (acc, replacedRegion, i) { | ||
var removeCount = regions[i].end - regions[i].start + 1; | ||
var result = acc.slice(0); | ||
result.splice(regions[i].start - removedWordsCount, removeCount, replacedRegion); | ||
removedWordsCount += removeCount - 1; | ||
return result; | ||
}, words).join(' '); | ||
return evaluateNumberRegion(region.text); | ||
}).reverse().reduce(function (acc, number, index) { | ||
var region = regions[regions.length - index - 1]; | ||
return splice(acc, region.start, region.end - region.start, '' + number); | ||
}, text); | ||
} | ||
exports.default = wordsToNumbers; |
{ | ||
"name": "words-to-numbers", | ||
"description": "convert textual words to numbers with optional fuzzy text matching", | ||
"version": "1.2.1", | ||
"version": "1.2.2", | ||
"license": "MIT", | ||
@@ -6,0 +6,0 @@ "options": { |
@@ -30,2 +30,6 @@ /* eslint-disable no-unused-expressions */ | ||
it('sixty nine', () => { | ||
expect(wtn('sixty nine')).to.equal(69); | ||
}); | ||
it('twenty thousand five hundred and sixty nine', () => { | ||
@@ -166,1 +170,41 @@ expect(wtn('twenty thousand five hundred and sixty nine')).to.equal(20569); | ||
}); | ||
it('digit one', () => { | ||
expect(wtn('digit one')).to.equal('digit 1'); | ||
}); | ||
it('digit one ', () => { | ||
expect(wtn('digit one ')).to.equal('digit 1 '); | ||
}); | ||
it('one thirty', () => { | ||
expect(wtn('one thirty')).to.equal('1 30'); | ||
}); | ||
it('thousand', () => { | ||
expect(wtn('thousand')).to.equal(1000); | ||
}); | ||
it('million', () => { | ||
expect(wtn('million')).to.equal(1000000); | ||
}); | ||
it('billion', () => { | ||
expect(wtn('billion')).to.equal(1000000000); | ||
}); | ||
it('xxxxxxx one hundred', () => { | ||
expect(wtn('xxxxxxx one hundred')).to.equal('xxxxxxx 100'); | ||
}); | ||
it('and', () => { | ||
expect(wtn('and')).to.equal('and'); | ||
}); | ||
it('a', () => { | ||
expect(wtn('a')).to.equal('a'); | ||
}); | ||
it('junkvalue', () => { | ||
expect(wtn('junkvalue')).to.equal('junkvalue'); | ||
}); |
312
src/index.js
@@ -1,50 +0,55 @@ | ||
// inpired by this answer on stackoverflow: http://stackoverflow.com/a/12014376 by http://stackoverflow.com/users/631193/javaandcsharp and thanks to @Greg Hewgill for the original, written in Python. | ||
/* Node module for converting words to numerals. | ||
Convert words to numbers. Optionally fuzzy match the words to numbers. | ||
`npm install words-to-numbers` | ||
If the whole string passed is a number then it will return a Number type otherwise it will return the original string with all instances of numbers replaced. | ||
*/ | ||
import itsSet from 'its-set'; | ||
import clj_fuzzy from 'clj-fuzzy'; | ||
import ohm from 'ohm-js'; | ||
const PRIMARY_COUNT = { | ||
const UNIT = { | ||
zero: 0, | ||
a: 1, | ||
first: 1, | ||
one: 1, | ||
first: 1, | ||
second: 2, | ||
two: 2, | ||
second: 2, | ||
third: 3, | ||
thirteenth: 13, | ||
thirteen: 13, | ||
three: 3, | ||
third: 3, | ||
fourth: 4, | ||
fourteenth: 14, | ||
fourteen: 14, | ||
four: 4, | ||
fourth: 4, | ||
fifteenth: 15, | ||
fifteen: 15, | ||
fifth: 5, | ||
five: 5, | ||
fifth: 5, | ||
sixth: 6, | ||
sixteenth: 16, | ||
sixteen: 16, | ||
six: 6, | ||
sixth: 6, | ||
seventeenth: 17, | ||
seventeen: 17, | ||
seventh: 7, | ||
seven: 7, | ||
seventh: 7, | ||
eighteenth: 18, | ||
eighteen: 18, | ||
eighth: 8, | ||
eight: 8, | ||
eighth: 8, | ||
nineteenth: 19, | ||
nineteen: 19, | ||
ninth: 9, | ||
nine: 9, | ||
ninth: 9, | ||
tenth: 10, | ||
ten: 10, | ||
tenth: 10, | ||
eleventh: 11, | ||
eleven: 11, | ||
eleventh: 11, | ||
twelfth: 12, | ||
twelve: 12, | ||
twelfth: 12, | ||
thirteen: 13, | ||
thirteenth: 13, | ||
fourteen: 14, | ||
fourteenth: 14, | ||
fifteen: 15, | ||
fifteenth: 15, | ||
sixteen: 16, | ||
sixteenth: 16, | ||
seventeen: 17, | ||
seventeenth: 17, | ||
eighteen: 18, | ||
eighteenth: 18, | ||
nineteen: 19, | ||
nineteenth: 19, | ||
a: 1, | ||
}; | ||
const SECONDARY_COUNT = { | ||
const TEN = { | ||
twenty: 20, | ||
@@ -68,4 +73,2 @@ twentieth: 20, | ||
const COUNT = Object.assign({}, PRIMARY_COUNT, SECONDARY_COUNT); | ||
const MAGNITUDE = { | ||
@@ -87,86 +90,54 @@ hundred: 100, | ||
const NUMBER_WORDS = Object.keys(COUNT) | ||
.concat(Object.keys(MAGNITUDE)) | ||
.concat(['and', 'point']); | ||
// all words found in number phrases | ||
const NUMBER_WORDS = ['and', 'point'] | ||
.concat(Object.keys(UNIT)) | ||
.concat(Object.keys(TEN)) | ||
.concat(Object.keys(MAGNITUDE)); | ||
const clean = word => word.replace(',', ''); | ||
const PUNCTUATION = /[.,\/#!$%\^&\*;:{}=\-_`~()]/g; | ||
const extractNumberRegions = words => { | ||
const numWords = words.length; | ||
const grammar = ohm.grammar(` | ||
WordsToNumbers { | ||
Number = Section* "point"? unit* | ||
Section = TenAndMagnitude | UnitAndMagnitude | TenUnitAndMagnitude | Unit | Ten | TenAndUnit | Magnitude | ||
Ten = ten ~unit ~magnitude | ||
TenAndUnit = ten unit ~magnitude | ||
TenAndMagnitude = ten ~unit magnitude | ||
UnitAndMagnitude = ~ten unit magnitude | ||
TenUnitAndMagnitude = ten unit magnitude | ||
Unit = ~ten unit ~magnitude | ||
Magnitude = ~ten ~unit magnitude | ||
ten = ${Object.keys(TEN).map(key => `"${key}" | `).join('').slice(0, -2)} | ||
unit = ${Object.keys(UNIT).map(key => `"${key}" | `).join('').slice(0, -2)} | ||
magnitude = ${Object.keys(MAGNITUDE).map(key => `"${key}" | `).join('').slice(0, -2)} | ||
} | ||
`); | ||
const numberWords = words | ||
.map(word => NUMBER_WORDS.includes(clean(word))); | ||
let pointReached = false; | ||
const reduced = numberWords | ||
.reduce((acc, isNumberWord, i) => { | ||
if (isNumberWord) { | ||
if (words[i] === 'point') pointReached = true; | ||
if (!itsSet(acc.start)) { | ||
acc.start = i; | ||
} | ||
else if ( | ||
Object.keys(PRIMARY_COUNT).includes(words[i - 1]) && | ||
Object.keys(PRIMARY_COUNT).includes(words[i]) && | ||
!pointReached | ||
) { | ||
acc.regions.push({start: acc.start, end: i - 1}); | ||
if (i === numWords - 1) { | ||
acc.regions.push({start: i, end: i}); | ||
} | ||
else { | ||
acc.start = i; | ||
} | ||
} | ||
const semantics = grammar | ||
.createSemantics() | ||
.addOperation('eval', { | ||
Number: (sections, point, decimal) => { | ||
const ints = sections.children.reduce((sum, child) => sum + child.eval(), 0); | ||
if (point.children.length) { | ||
const decimals = decimal.children | ||
.reduce((acc, d) => `${acc}${d.eval()}`, '') | ||
.replace(/\s/g, ''); | ||
return parseFloat(`${ints}.${decimals}`); | ||
} | ||
else if (itsSet(acc.start)) { | ||
acc.regions.push({start: acc.start, end: i - 1}); | ||
acc.start = null; | ||
} | ||
return acc; | ||
}, {regions: [], start: null}); | ||
return reduced.start === 0 && !reduced.regions.length ? 'whole' : reduced.regions; | ||
}; | ||
return ints; | ||
}, | ||
Ten: ten => ten.eval(), | ||
Unit: (unit) => unit.eval(), | ||
TenAndUnit: (ten, unit) => ten.eval() + unit.eval(), | ||
TenAndMagnitude: (ten, magnitude) => ten.eval() * magnitude.eval(), | ||
UnitAndMagnitude: (unit, magnitude) => unit.eval() * magnitude.eval(), | ||
TenUnitAndMagnitude: (ten, unit, magnitude) => | ||
(ten.eval() + unit.eval()) * magnitude.eval(), | ||
Magnitude: magnitude => magnitude.eval(), | ||
unit: (value) => UNIT[value.primitiveValue], | ||
ten: (value) => TEN[value.primitiveValue], | ||
magnitude: (value) => MAGNITUDE[value.primitiveValue], | ||
}); | ||
const convertWordsToDecimal = words => | ||
words.map(word => COUNT[word]) | ||
.join(''); | ||
const convertWordsToNonDecimal = words => { | ||
const reduced = words.reduce((acc, word) => { | ||
const cleanWord = clean(word); | ||
if (cleanWord === 'and') return acc; | ||
if (itsSet(acc.count)) { | ||
if (itsSet(COUNT[cleanWord])) { | ||
acc.extra += COUNT[acc.count]; | ||
acc.count = cleanWord; | ||
} | ||
else { | ||
acc.pairs.push({count: acc.count, magnitude: cleanWord}); | ||
acc.count = null; | ||
} | ||
} | ||
else { | ||
acc.count = cleanWord; | ||
} | ||
return acc; | ||
}, {pairs: [], count: null, extra: 0}); | ||
return reduced.pairs.reduce((acc, pair) => | ||
acc + COUNT[pair.count] * MAGNITUDE[pair.magnitude] | ||
, COUNT[reduced.count] || 0) + reduced.extra; | ||
}; | ||
const convertWordsToNumber = words => { | ||
const pointIndex = words.indexOf('point'); | ||
if (pointIndex > -1) { | ||
const numberWords = words.slice(0, pointIndex); | ||
const decimalWords = words.slice(pointIndex + 1); | ||
return parseFloat(`${convertWordsToNonDecimal(numberWords)}.${convertWordsToDecimal(decimalWords)}`); | ||
} | ||
return convertWordsToNonDecimal(words); | ||
}; | ||
// try coerce a word into a NUMBER_WORD using fuzzy matching | ||
const fuzzyMatch = word => { | ||
@@ -182,25 +153,106 @@ return NUMBER_WORDS | ||
export function wordsToNumbers (text, options) { | ||
const opts = Object.assign({fuzzy: false}, options); | ||
let words = text.toString().split(/[\s-]+/); | ||
if (opts.fuzzy) words = words.map(word => fuzzyMatch(word)); | ||
const regions = extractNumberRegions(words); | ||
const isUnit = word => Object.keys(UNIT).indexOf(word) !== -1; | ||
const isTen = word => Object.keys(TEN).indexOf(word) !== -1; | ||
const isMag = word => Object.keys(MAGNITUDE).indexOf(word) !== -1; | ||
if (regions === 'whole') return convertWordsToNumber(words); | ||
if (!regions.length) return null; | ||
const findRegions = (text, fuzzy) => { | ||
const words = text | ||
.split(/[ -]/g) | ||
.map(word => fuzzy ? fuzzyMatch(word) : word) | ||
.reduce((acc, word, i) => { | ||
const start = acc.length ? acc[i - 1].end + 1 : 0; | ||
return acc.concat({ | ||
text: word, | ||
start, | ||
end: start + word.length, | ||
}); | ||
}, []) | ||
.map(word => | ||
Object.assign({}, word, { | ||
isNumberWord: NUMBER_WORDS.indexOf( | ||
word.text.replace(PUNCTUATION, '').toLowerCase() | ||
) !== -1, | ||
}) | ||
); | ||
let removedWordsCount = 0; | ||
return regions.map(region => | ||
convertWordsToNumber(words.slice(region.start, region.end + 1)) | ||
) | ||
.reduce((acc, replacedRegion, i) => { | ||
const removeCount = regions[i].end - regions[i].start + 1; | ||
const result = acc.slice(0); | ||
result.splice(regions[i].start - removedWordsCount, removeCount, replacedRegion); | ||
removedWordsCount += removeCount - 1; | ||
return result; | ||
}, words) | ||
.join(' '); | ||
return words | ||
.reduce((regions, word, index) => { | ||
if (!word.isNumberWord) return regions; | ||
if (!regions.length) return [word]; | ||
if (word.text === 'point') { | ||
const newRegions = regions.slice(); | ||
newRegions[regions.length - 1].pointReached = true; | ||
newRegions[regions.length - 1].end = word.end; | ||
newRegions[regions.length - 1].text += ` ${word.text}`; | ||
return newRegions; | ||
} | ||
const prevRegion = regions[regions.length - 1]; | ||
const prevWord = words[index - 1] || ''; | ||
if ( | ||
prevRegion.end === word.start - 1 && | ||
!(isUnit(word.text) && isUnit(prevWord.text) || prevRegion.pointReached) && | ||
!(isTen(word.text) && isTen(prevWord.text)) && | ||
!(isMag(word.text) && isMag(prevWord.text)) && | ||
!(isTen(word.text) && isUnit(prevWord.text)) || | ||
(prevRegion.pointReached && isUnit(word.text)) || | ||
word === 'and' || | ||
prevWord === 'and' | ||
) { | ||
const newRegions = regions.slice(); | ||
newRegions[regions.length - 1].end = word.end; | ||
newRegions[regions.length - 1].text += ` ${word.text}`; | ||
return newRegions; | ||
} | ||
return regions.concat(word); | ||
}, []); | ||
}; | ||
const evaluateNumberRegion = text => { | ||
const textIsOnlyHelperWord = ['a', 'and'].reduce((acc, word) => acc || text === word, false); | ||
if (textIsOnlyHelperWord) return text; | ||
var m = grammar.match(text.replace(PUNCTUATION, ' ').replace(/\band\b/g, '')); | ||
if (m.succeeded()) { | ||
return semantics(m).eval(); | ||
} | ||
else { | ||
console.log(m.message); | ||
return text; | ||
} | ||
}; | ||
function splice (str, index, count, add) { | ||
let i = index; | ||
if (i < 0) { | ||
i = str.length + i; | ||
if (i < 0) { | ||
i = 0; | ||
} | ||
} | ||
return str.slice(0, i) + (add || '') + str.slice(i + count); | ||
} | ||
// replace all number words in a string with actual numerals. | ||
// If string contains multiple separate numbers then replace each one individually. | ||
// If option `fuzzy` = true then try coerce words into numbers before conversion to numbers. | ||
export function wordsToNumbers (text, options) { | ||
const opts = Object.assign({fuzzy: false}, options); | ||
const regions = findRegions(text, opts.fuzzy); | ||
if (!regions.length) return text; | ||
if (regions.length === 1 && regions[0].start === 0 && regions[0].end === regions[0].text.length) { | ||
return evaluateNumberRegion(regions[0].text); | ||
} | ||
return regions | ||
.map(region => evaluateNumberRegion(region.text)) | ||
.reverse() | ||
.reduce((acc, number, index) => { | ||
const region = regions[regions.length - index - 1]; | ||
return splice( | ||
acc, | ||
region.start, | ||
region.end - region.start, | ||
`${number}` | ||
); | ||
}, text); | ||
} | ||
export default wordsToNumbers; |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
40788
14
1058
1