words-to-numbers
Advanced tools
Comparing version 1.4.1 to 1.5.0
@@ -7,2 +7,10 @@ 'use strict'; | ||
var _typeof2 = require('babel-runtime/helpers/typeof'); | ||
var _typeof3 = _interopRequireDefault(_typeof2); | ||
var _extends2 = require('babel-runtime/helpers/extends'); | ||
var _extends3 = _interopRequireDefault(_extends2); | ||
var _util = require('./util'); | ||
@@ -12,17 +20,59 @@ | ||
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } | ||
var getNumber = function getNumber(region) { | ||
var sum = 0; | ||
region.subRegions.forEach(function (_ref) { | ||
var tokens = _ref.tokens; | ||
var type = _ref.type; | ||
var decimalReached = false; | ||
var decimalUnits = []; | ||
region.subRegions.forEach(function (subRegion) { | ||
var tokens = subRegion.tokens; | ||
var type = subRegion.type; | ||
var subRegionSum = 0; | ||
if (type === _constants.TOKEN_TYPE.DECIMAL) { | ||
decimalReached = true; | ||
return; | ||
} | ||
if (decimalReached) { | ||
decimalUnits.push(subRegion); | ||
return; | ||
} | ||
switch (type) { | ||
case _constants.TOKEN_TYPE.MAGNITUDE: | ||
case _constants.TOKEN_TYPE.HUNDRED: | ||
{ | ||
subRegionSum = 1; | ||
tokens.forEach(function (token) { | ||
subRegionSum *= _constants.NUMBER[token.lowerCaseValue]; | ||
}); | ||
break; | ||
var _ret = function () { | ||
subRegionSum = 1; | ||
var tokensCount = tokens.length; | ||
tokens.reduce(function (acc, token, i) { | ||
if (token.type === _constants.TOKEN_TYPE.HUNDRED) { | ||
var _ret2 = function () { | ||
var tokensToAdd = tokensCount - 1 ? tokens.slice(i + 1) : []; | ||
tokensToAdd = tokensToAdd.filter(function (tokenToAdd, j) { | ||
return j === 0 || tokensToAdd[j - 1].type > tokenToAdd.type; | ||
}); | ||
var tokensToAddSum = tokensToAdd.reduce(function (acc2, tokenToAdd) { | ||
return acc2 + _constants.NUMBER[tokenToAdd.lowerCaseValue]; | ||
}, 0); | ||
return { | ||
v: acc.concat((0, _extends3.default)({}, tokens[i + 1], { | ||
numberValue: tokensToAddSum + _constants.NUMBER[token.lowerCaseValue] * 100 | ||
})) | ||
}; | ||
}(); | ||
if ((typeof _ret2 === 'undefined' ? 'undefined' : (0, _typeof3.default)(_ret2)) === "object") return _ret2.v; | ||
} | ||
if (i > 0 && tokens[i - 1].type === _constants.TOKEN_TYPE.HUNDRED) return acc; | ||
if (i > 1 && tokens[i - 1].type === _constants.TOKEN_TYPE.TEN && tokens[i - 2].type === _constants.TOKEN_TYPE.HUNDRED) return acc; | ||
return acc.concat({ token: token, numberValue: _constants.NUMBER[token.lowerCaseValue] }); | ||
}, []).forEach(function (_ref) { | ||
var numberValue = _ref.numberValue; | ||
subRegionSum *= numberValue; | ||
}); | ||
return 'break'; | ||
}(); | ||
if (_ret === 'break') break; | ||
} | ||
@@ -37,11 +87,2 @@ case _constants.TOKEN_TYPE.UNIT: | ||
} | ||
case _constants.TOKEN_TYPE.DECIMAL: | ||
{ | ||
tokens.forEach(function (token, i) { | ||
if (!_constants.DECIMALS.includes(token.lowerCaseValue)) { | ||
subRegionSum += _constants.NUMBER[token.lowerCaseValue] / Math.pow(10, i); | ||
} | ||
}); | ||
break; | ||
} | ||
// no default | ||
@@ -51,2 +92,15 @@ } | ||
}); | ||
var currentDecimalPlace = 1; | ||
decimalUnits.forEach(function (_ref2) { | ||
var tokens = _ref2.tokens; | ||
tokens.forEach(function (_ref3) { | ||
var lowerCaseValue = _ref3.lowerCaseValue; | ||
sum += _constants.NUMBER[lowerCaseValue] / Math.pow(10, currentDecimalPlace); | ||
currentDecimalPlace += 1; | ||
}); | ||
}); | ||
return sum; | ||
@@ -60,5 +114,5 @@ }; | ||
var length = region.end - region.start + 1; | ||
var replaceWith = getNumber(region); | ||
var replaceWith = '' + getNumber(region); | ||
replaced = (0, _util.splice)(replaced, region.start + offset, length, replaceWith); | ||
offset -= length - ('' + replaceWith).length; | ||
offset -= length - replaceWith.length; | ||
}); | ||
@@ -68,5 +122,5 @@ return replaced; | ||
exports.default = function (_ref2) { | ||
var regions = _ref2.regions; | ||
var text = _ref2.text; | ||
exports.default = function (_ref4) { | ||
var regions = _ref4.regions; | ||
var text = _ref4.text; | ||
@@ -73,0 +127,0 @@ if (!regions) return text; |
@@ -117,3 +117,4 @@ 'use strict'; | ||
MAGNITUDE: 2, | ||
DECIMAL: 5 | ||
DECIMAL: 3, | ||
HUNDRED: 4 | ||
}; | ||
@@ -120,0 +121,0 @@ |
@@ -23,3 +23,3 @@ 'use strict'; | ||
if (!regions.length) return text; | ||
var compiled = (0, _compiler2.default)({ text: text, regions: regions }, options); | ||
var compiled = (0, _compiler2.default)({ text: text, regions: regions }); | ||
return compiled; | ||
@@ -26,0 +26,0 @@ } |
@@ -7,6 +7,2 @@ 'use strict'; | ||
var _toConsumableArray2 = require('babel-runtime/helpers/toConsumableArray'); | ||
var _toConsumableArray3 = _interopRequireDefault(_toConsumableArray2); | ||
var _extends2 = require('babel-runtime/helpers/extends'); | ||
@@ -24,2 +20,3 @@ | ||
/* eslint-disable no-extra-parens */ | ||
var SKIP = 0; | ||
@@ -30,12 +27,17 @@ var ADD = 1; | ||
var canAddTokenToEndOfSubRegion = function canAddTokenToEndOfSubRegion(subRegion, token) { | ||
var canAddTokenToEndOfSubRegion = function canAddTokenToEndOfSubRegion(subRegion, currentToken, _ref) { | ||
var impliedHundreds = _ref.impliedHundreds; | ||
var tokens = subRegion.tokens; | ||
var type = token.type; | ||
var prevToken = tokens[0]; | ||
if (!prevToken) return true; | ||
if (prevToken.type === _constants.TOKEN_TYPE.MAGNITUDE && type === _constants.TOKEN_TYPE.UNIT) return true; | ||
if (prevToken.type === _constants.TOKEN_TYPE.MAGNITUDE && type === _constants.TOKEN_TYPE.TEN) return true; | ||
if (prevToken.type === _constants.TOKEN_TYPE.TEN && type === _constants.TOKEN_TYPE.UNIT) return true; | ||
if (prevToken.type === _constants.TOKEN_TYPE.MAGNITUDE && type === _constants.TOKEN_TYPE.MAGNITUDE) return true; | ||
if (prevToken.type === _constants.TOKEN_TYPE.MAGNITUDE && currentToken.type === _constants.TOKEN_TYPE.UNIT) return true; | ||
if (prevToken.type === _constants.TOKEN_TYPE.MAGNITUDE && currentToken.type === _constants.TOKEN_TYPE.TEN) return true; | ||
if (impliedHundreds && subRegion.type === _constants.TOKEN_TYPE.MAGNITUDE && prevToken.type === _constants.TOKEN_TYPE.TEN && currentToken.type === _constants.TOKEN_TYPE.UNIT) return true; | ||
if (impliedHundreds && subRegion.type === _constants.TOKEN_TYPE.MAGNITUDE && prevToken.type === _constants.TOKEN_TYPE.UNIT && currentToken.type === _constants.TOKEN_TYPE.TEN) return true; | ||
if (prevToken.type === _constants.TOKEN_TYPE.TEN && currentToken.type === _constants.TOKEN_TYPE.UNIT) return true; | ||
if (!impliedHundreds && prevToken.type === _constants.TOKEN_TYPE.TEN && currentToken.type === _constants.TOKEN_TYPE.UNIT) return true; | ||
if (prevToken.type === _constants.TOKEN_TYPE.MAGNITUDE && currentToken.type === _constants.TOKEN_TYPE.MAGNITUDE) return true; | ||
if (!impliedHundreds && prevToken.type === _constants.TOKEN_TYPE.TEN && currentToken.type === _constants.TOKEN_TYPE.TEN) return false; | ||
if (impliedHundreds && prevToken.type === _constants.TOKEN_TYPE.TEN && currentToken.type === _constants.TOKEN_TYPE.TEN) return true; | ||
return false; | ||
@@ -46,17 +48,25 @@ }; | ||
if (!subRegion) { | ||
return currentToken.type; | ||
return { type: currentToken.type }; | ||
} | ||
if (subRegion.type === _constants.TOKEN_TYPE.MAGNITUDE) return _constants.TOKEN_TYPE.MAGNITUDE; | ||
return currentToken.type; | ||
var prevToken = subRegion.tokens[0]; | ||
var isHundred = prevToken.type === _constants.TOKEN_TYPE.TEN && currentToken.type === _constants.TOKEN_TYPE.UNIT || prevToken.type === _constants.TOKEN_TYPE.TEN && currentToken.type === _constants.TOKEN_TYPE.TEN || prevToken.type === _constants.TOKEN_TYPE.UNIT && currentToken.type === _constants.TOKEN_TYPE.TEN && _constants.NUMBER[prevToken.lowerCaseValue] > 9 || prevToken.type === _constants.TOKEN_TYPE.UNIT && currentToken.type === _constants.TOKEN_TYPE.UNIT || prevToken.type === _constants.TOKEN_TYPE.TEN && currentToken.type === _constants.TOKEN_TYPE.UNIT && subRegion.type === _constants.TOKEN_TYPE.MAGNITUDE; | ||
if (subRegion.type === _constants.TOKEN_TYPE.MAGNITUDE) return { type: _constants.TOKEN_TYPE.MAGNITUDE, isHundred: isHundred }; | ||
if (isHundred) return { type: _constants.TOKEN_TYPE.HUNDRED, isHundred: isHundred }; | ||
return { type: currentToken.type, isHundred: isHundred }; | ||
}; | ||
var checkIfTokenFitsSubRegion = function checkIfTokenFitsSubRegion(subRegion, token) { | ||
if (!subRegion) return { action: START_NEW_REGION, type: getSubRegionType(subRegion, token) }; | ||
if (canAddTokenToEndOfSubRegion(subRegion, token)) { | ||
return { action: ADD, type: getSubRegionType(subRegion, token) }; | ||
var checkIfTokenFitsSubRegion = function checkIfTokenFitsSubRegion(subRegion, token, options) { | ||
var _getSubRegionType = getSubRegionType(subRegion, token); | ||
var type = _getSubRegionType.type; | ||
var isHundred = _getSubRegionType.isHundred; | ||
if (!subRegion) return { action: START_NEW_REGION, type: type, isHundred: isHundred }; | ||
if (canAddTokenToEndOfSubRegion(subRegion, token, options)) { | ||
return { action: ADD, type: type, isHundred: isHundred }; | ||
} | ||
return { action: START_NEW_REGION, type: getSubRegionType(subRegion, token) }; | ||
return { action: START_NEW_REGION, type: type, isHundred: isHundred }; | ||
}; | ||
var getSubRegions = function getSubRegions(region) { | ||
var getSubRegions = function getSubRegions(region, options) { | ||
var subRegions = []; | ||
@@ -69,7 +79,9 @@ var currentSubRegion = void 0; | ||
var _checkIfTokenFitsSubR = checkIfTokenFitsSubRegion(currentSubRegion, token); | ||
var _checkIfTokenFitsSubR = checkIfTokenFitsSubRegion(currentSubRegion, token, options); | ||
var action = _checkIfTokenFitsSubR.action; | ||
var type = _checkIfTokenFitsSubR.type; | ||
var isHundred = _checkIfTokenFitsSubR.isHundred; | ||
token.type = isHundred ? _constants.TOKEN_TYPE.HUNDRED : token.type; | ||
switch (action) { | ||
@@ -98,14 +110,18 @@ case ADD: | ||
var canAddTokenToEndOfRegion = function canAddTokenToEndOfRegion(region, token) { | ||
var canAddTokenToEndOfRegion = function canAddTokenToEndOfRegion(region, currentToken, _ref2) { | ||
var impliedHundreds = _ref2.impliedHundreds; | ||
var tokens = region.tokens; | ||
var type = token.type; | ||
var prevToken = tokens[tokens.length - 1]; | ||
if (prevToken.type === _constants.TOKEN_TYPE.UNIT && type === _constants.TOKEN_TYPE.UNIT) return false; | ||
if (prevToken.type === _constants.TOKEN_TYPE.UNIT && type === _constants.TOKEN_TYPE.TEN) return false; | ||
if (prevToken.type === _constants.TOKEN_TYPE.TEN && type === _constants.TOKEN_TYPE.TEN) return false; | ||
if (!impliedHundreds && prevToken.type === _constants.TOKEN_TYPE.UNIT && currentToken.type === _constants.TOKEN_TYPE.UNIT && !region.hasDecimal) return false; | ||
if (!impliedHundreds && prevToken.type === _constants.TOKEN_TYPE.UNIT && currentToken.type === _constants.TOKEN_TYPE.TEN) return false; | ||
if (!impliedHundreds && prevToken.type === _constants.TOKEN_TYPE.TEN && currentToken.type === _constants.TOKEN_TYPE.TEN) return false; | ||
return true; | ||
}; | ||
var checkIfTokenFitsRegion = function checkIfTokenFitsRegion(region, token) { | ||
var checkIfTokenFitsRegion = function checkIfTokenFitsRegion(region, token, options) { | ||
var isDecimal = _constants.DECIMALS.includes(token.lowerCaseValue); | ||
if ((!region || !region.tokens.length) && isDecimal) { | ||
return START_NEW_REGION; | ||
} | ||
var isPunctuation = _constants.PUNCTUATION.includes(token.lowerCaseValue); | ||
@@ -115,8 +131,9 @@ if (isPunctuation) return SKIP; | ||
if (isJoiner) return SKIP; | ||
var isDecimal = _constants.DECIMALS.includes(token.lowerCaseValue); | ||
if (isDecimal) return ADD; | ||
if (isDecimal && !region.hasDecimal) { | ||
return ADD; | ||
} | ||
var isNumberWord = _constants.NUMBER_WORDS.includes(token.lowerCaseValue); | ||
if (isNumberWord) { | ||
if (!region) return START_NEW_REGION; | ||
if (canAddTokenToEndOfRegion(region, token)) { | ||
if (canAddTokenToEndOfRegion(region, token, options)) { | ||
return ADD; | ||
@@ -129,26 +146,2 @@ } | ||
var regionIsValid = function regionIsValid(region) { | ||
if (region.tokens.length === 1) { | ||
if (['a'].includes(region.tokens[0].lowerCaseValue)) return false; | ||
} | ||
return true; | ||
}; | ||
var getDecimalTokenIndex = function getDecimalTokenIndex(tokens) { | ||
return tokens.reduce(function (acc, token, i) { | ||
return _constants.DECIMALS.includes(token.lowerCaseValue) ? i : acc; | ||
}, -1); | ||
}; | ||
var getDecimalSubRegion = function getDecimalSubRegion(tokens) { | ||
return { | ||
start: tokens[0].start, | ||
end: tokens[tokens.length - 1].end, | ||
tokens: tokens.reduce(function (acc, token) { | ||
return _constants.NUMBER_WORDS.concat(_constants.DECIMALS).includes(token.lowerCaseValue) ? acc.concat(token) : acc; | ||
}, []), | ||
type: _constants.TOKEN_TYPE.DECIMAL | ||
}; | ||
}; | ||
var checkBlacklist = function checkBlacklist(tokens) { | ||
@@ -158,3 +151,3 @@ return tokens.length === 1 && _constants.BLACKLIST_SINGULAR_WORDS.includes(tokens[0].lowerCaseValue); | ||
var matchRegions = function matchRegions(tokens) { | ||
var matchRegions = function matchRegions(tokens, options) { | ||
var regions = []; | ||
@@ -167,6 +160,5 @@ | ||
var tokensCount = tokens.length; | ||
var decimalIndex = getDecimalTokenIndex(tokens); | ||
while (i < (decimalIndex === -1 ? tokensCount : decimalIndex)) { | ||
while (i < tokensCount) { | ||
var token = tokens[i]; | ||
var tokenFits = checkIfTokenFitsRegion(currentRegion, token); | ||
var tokenFits = checkIfTokenFitsRegion(currentRegion, token, options); | ||
switch (tokenFits) { | ||
@@ -179,4 +171,9 @@ case SKIP: | ||
{ | ||
currentRegion.end = token.end; | ||
currentRegion.tokens.push(token); | ||
if (currentRegion) { | ||
currentRegion.end = token.end; | ||
currentRegion.tokens.push(token); | ||
if (token.type === _constants.TOKEN_TYPE.DECIMAL) { | ||
currentRegion.hasDecimal = true; | ||
} | ||
} | ||
break; | ||
@@ -192,2 +189,5 @@ } | ||
regions.push(currentRegion); | ||
if (token.type === _constants.TOKEN_TYPE.DECIMAL) { | ||
currentRegion.hasDecimal = true; | ||
} | ||
break; | ||
@@ -204,17 +204,6 @@ } | ||
} | ||
return regions.reduce(function (acc, region) { | ||
var decimalRegion = decimalIndex !== -1 ? getDecimalSubRegion(tokens.slice(decimalIndex)) : { tokens: [] }; | ||
var subRegions = getSubRegions(region); | ||
if (decimalRegion.tokens.length) { | ||
subRegions = subRegions.concat(decimalRegion); | ||
} | ||
var regionWithDecimal = (0, _extends3.default)({}, region, { | ||
subRegions: subRegions | ||
}); | ||
regionWithDecimal.tokens = [].concat((0, _toConsumableArray3.default)(regionWithDecimal.tokens), (0, _toConsumableArray3.default)(decimalRegion.tokens)); | ||
if (regionWithDecimal.tokens.length) { | ||
regionWithDecimal.end = regionWithDecimal.tokens[regionWithDecimal.tokens.length - 1].end; | ||
} | ||
return regionIsValid(decimalRegion) ? acc.concat(regionWithDecimal) : acc; | ||
}, []); | ||
return regions.map(function (region) { | ||
return (0, _extends3.default)({}, region, { subRegions: getSubRegions(region, options) }); | ||
}); | ||
}; | ||
@@ -221,0 +210,0 @@ |
{ | ||
"name": "words-to-numbers", | ||
"description": "convert textual words to numbers with optional fuzzy text matching", | ||
"version": "1.4.1", | ||
"version": "1.5.0", | ||
"license": "MIT", | ||
@@ -33,3 +33,3 @@ "options": { | ||
}, | ||
"author": "Finn Fitzsimons <>", | ||
"author": "Finn Fitzsimons <hello@finn.works>", | ||
"bugs": { | ||
@@ -36,0 +36,0 @@ "url": "https://github.com/finnfiddle/words-to-numbers/issues" |
@@ -10,2 +10,6 @@ /* eslint-disable no-unused-expressions */ | ||
it('one hundred two', () => { | ||
expect(wtn('one hundred two')).to.equal(102); | ||
}); | ||
it('one hundred and five', () => { | ||
@@ -218,1 +222,103 @@ expect(wtn('one hundred and five')).to.equal(105); | ||
}); | ||
it('Model Fifty-One Fifty-Six', () => { | ||
expect(wtn('Model Fifty-One Fifty-Six')).to.eq('Model 51 56'); | ||
}); | ||
it('Fifty Million Frenchmen', () => { | ||
expect(wtn('Fifty Million Frenchmen')).to.eq('50000000 Frenchmen'); | ||
}); | ||
it('A Thousand and One Wives', () => { | ||
expect(wtn('A Thousand and One Wives')).to.eq('1001 Wives'); | ||
}); | ||
it('Ten Thousand Pictures of You', () => { | ||
expect(wtn('Ten Thousand Pictures of You')).to.eq('10000 Pictures of You'); | ||
}); | ||
it('nineteen eighty four', () => { | ||
expect(wtn('nineteen eighty four', { impliedHundreds: true })).to.eq(1984); | ||
}); | ||
it('one thirty', () => { | ||
expect(wtn('one thirty', { impliedHundreds: true })).to.eq(130); | ||
}); | ||
it('six sixty two', () => { | ||
expect(wtn('six sixty two', { impliedHundreds: true })).to.eq(662); | ||
}); | ||
it('ten twelve', () => { | ||
expect(wtn('ten twelve', { impliedHundreds: true })).to.eq(1012); | ||
}); | ||
it('nineteen ten', () => { | ||
expect(wtn('nineteen ten', { impliedHundreds: true })).to.eq(1910); | ||
}); | ||
it('twenty ten', () => { | ||
expect(wtn('twenty ten', { impliedHundreds: true })).to.eq(2010); | ||
}); | ||
it('twenty seventeen', () => { | ||
expect(wtn('twenty seventeen', { impliedHundreds: true })).to.eq(2017); | ||
}); | ||
it('twenty twenty', () => { | ||
expect(wtn('twenty twenty', { impliedHundreds: true })).to.eq(2020); | ||
}); | ||
it('twenty twenty one', () => { | ||
expect(wtn('twenty twenty one', { impliedHundreds: true })).to.eq(2021); | ||
}); | ||
it('fifty sixty three', () => { | ||
expect(wtn('fifty sixty three', { impliedHundreds: true })).to.eq(5063); | ||
}); | ||
it('fifty sixty', () => { | ||
expect(wtn('fifty sixty', { impliedHundreds: true })).to.eq(5060); | ||
}); | ||
it('three thousand', () => { | ||
expect(wtn('three thousand', { impliedHundreds: true })).to.eq(3000); | ||
}); | ||
it('fifty sixty three thousand', () => { | ||
expect(wtn('fifty sixty three thousand', { impliedHundreds: true })).to.eq(5063000); | ||
}); | ||
it('one hundred thousand', () => { | ||
expect(wtn('one hundred thousand')).to.eq(100000); | ||
}); | ||
it('I have zero apples and four oranges', () => { | ||
expect(wtn('I have zero apples and four oranges')).to.eq('I have 0 apples and 4 oranges'); | ||
}); | ||
it('Dot two Dot', () => { | ||
expect(wtn('Dot two Dot')).to.eq('0.2 Dot'); | ||
}); | ||
it('seventeen dot two four dot twelve dot five', () => { | ||
expect(wtn('seventeen dot two four dot twelve dot five')).to.eq('17.24 dot 12.5'); | ||
}); | ||
// these dont work below fml | ||
// it('one thirty thousand', () => { | ||
// expect(wtn('one thirty thousand', { impliedHundreds: true })).to.eq(130000); | ||
// }); | ||
// it('nineteen eighty thousand', () => { | ||
// expect(wtn('nineteen eighty thousand', { impliedHundreds: true })).to.eq('19 80000'); | ||
// }); | ||
// it('one hundred two thousand', () => { | ||
// expect(wtn('one hundred two thousand')).to.eq(102000); | ||
// }); | ||
// it('one hundred and two thousand', () => { | ||
// expect(wtn('one hundred and two thousand')).to.eq(102000); | ||
// }); |
import { splice } from './util'; | ||
import { TOKEN_TYPE, NUMBER, DECIMALS } from './constants'; | ||
import { TOKEN_TYPE, NUMBER } from './constants'; | ||
const getNumber = region => { | ||
let sum = 0; | ||
region.subRegions.forEach(({ tokens, type }) => { | ||
let decimalReached = false; | ||
let decimalUnits = []; | ||
region.subRegions.forEach((subRegion) => { | ||
const { tokens, type } = subRegion; | ||
let subRegionSum = 0; | ||
if (type === TOKEN_TYPE.DECIMAL) { | ||
decimalReached = true; | ||
return; | ||
} | ||
if (decimalReached) { | ||
decimalUnits.push(subRegion); | ||
return; | ||
} | ||
switch (type) { | ||
case TOKEN_TYPE.MAGNITUDE: { | ||
case TOKEN_TYPE.MAGNITUDE: | ||
case TOKEN_TYPE.HUNDRED: { | ||
subRegionSum = 1; | ||
tokens.forEach(token => { | ||
subRegionSum *= NUMBER[token.lowerCaseValue]; | ||
const tokensCount = tokens.length; | ||
tokens.reduce((acc, token, i) => { | ||
if (token.type === TOKEN_TYPE.HUNDRED) { | ||
let tokensToAdd = tokensCount - 1 ? tokens.slice(i + 1) : []; | ||
tokensToAdd = tokensToAdd.filter((tokenToAdd, j) => | ||
j === 0 || tokensToAdd[j - 1].type > tokenToAdd.type | ||
); | ||
const tokensToAddSum = tokensToAdd.reduce((acc2, tokenToAdd) => | ||
acc2 + NUMBER[tokenToAdd.lowerCaseValue] | ||
, 0); | ||
return acc.concat({ | ||
...tokens[i + 1], | ||
numberValue: tokensToAddSum + (NUMBER[token.lowerCaseValue] * 100), | ||
}); | ||
} | ||
if (i > 0 && tokens[i - 1].type === TOKEN_TYPE.HUNDRED) return acc; | ||
if ( | ||
i > 1 && | ||
tokens[i - 1].type === TOKEN_TYPE.TEN && | ||
tokens[i - 2].type === TOKEN_TYPE.HUNDRED | ||
) return acc; | ||
return acc.concat({ token, numberValue: NUMBER[token.lowerCaseValue] }); | ||
}, []).forEach(({ numberValue }) => { | ||
subRegionSum *= numberValue; | ||
}); | ||
@@ -23,10 +57,2 @@ break; | ||
} | ||
case TOKEN_TYPE.DECIMAL: { | ||
tokens.forEach((token, i) => { | ||
if (!DECIMALS.includes(token.lowerCaseValue)) { | ||
subRegionSum += NUMBER[token.lowerCaseValue] / Math.pow(10, i); | ||
} | ||
}); | ||
break; | ||
} | ||
// no default | ||
@@ -36,2 +62,11 @@ } | ||
}); | ||
let currentDecimalPlace = 1; | ||
decimalUnits.forEach(({ tokens }) => { | ||
tokens.forEach(({ lowerCaseValue }) => { | ||
sum += NUMBER[lowerCaseValue] / Math.pow(10, currentDecimalPlace); | ||
currentDecimalPlace += 1; | ||
}); | ||
}); | ||
return sum; | ||
@@ -45,5 +80,5 @@ }; | ||
const length = region.end - region.start + 1; | ||
const replaceWith = getNumber(region); | ||
const replaceWith = `${getNumber(region)}`; | ||
replaced = splice(replaced, region.start + offset, length, replaceWith); | ||
offset -= length - `${replaceWith}`.length; | ||
offset -= length - replaceWith.length; | ||
}); | ||
@@ -50,0 +85,0 @@ return replaced; |
@@ -121,3 +121,4 @@ | ||
MAGNITUDE: 2, | ||
DECIMAL: 5, | ||
DECIMAL: 3, | ||
HUNDRED: 4, | ||
}; | ||
@@ -124,0 +125,0 @@ |
@@ -7,3 +7,3 @@ import parser from './parser'; | ||
if (!regions.length) return text; | ||
const compiled = compiler({ text, regions }, options); | ||
const compiled = compiler({ text, regions }); | ||
return compiled; | ||
@@ -10,0 +10,0 @@ } |
@@ -0,1 +1,2 @@ | ||
/* eslint-disable no-extra-parens */ | ||
import { | ||
@@ -11,2 +12,3 @@ PUNCTUATION, | ||
BLACKLIST_SINGULAR_WORDS, | ||
NUMBER, | ||
} from './constants'; | ||
@@ -20,11 +22,49 @@ import fuzzyMatch from './fuzzy'; | ||
const canAddTokenToEndOfSubRegion = (subRegion, token) => { | ||
const canAddTokenToEndOfSubRegion = (subRegion, currentToken, { impliedHundreds }) => { | ||
const { tokens } = subRegion; | ||
const { type } = token; | ||
const prevToken = tokens[0]; | ||
if (!prevToken) return true; | ||
if (prevToken.type === TOKEN_TYPE.MAGNITUDE && type === TOKEN_TYPE.UNIT) return true; | ||
if (prevToken.type === TOKEN_TYPE.MAGNITUDE && type === TOKEN_TYPE.TEN) return true; | ||
if (prevToken.type === TOKEN_TYPE.TEN && type === TOKEN_TYPE.UNIT) return true; | ||
if (prevToken.type === TOKEN_TYPE.MAGNITUDE && type === TOKEN_TYPE.MAGNITUDE) return true; | ||
if ( | ||
prevToken.type === TOKEN_TYPE.MAGNITUDE && | ||
currentToken.type === TOKEN_TYPE.UNIT | ||
) return true; | ||
if ( | ||
prevToken.type === TOKEN_TYPE.MAGNITUDE && | ||
currentToken.type === TOKEN_TYPE.TEN | ||
) return true; | ||
if ( | ||
impliedHundreds && | ||
subRegion.type === TOKEN_TYPE.MAGNITUDE && | ||
prevToken.type === TOKEN_TYPE.TEN && | ||
currentToken.type === TOKEN_TYPE.UNIT | ||
) return true; | ||
if ( | ||
impliedHundreds && | ||
subRegion.type === TOKEN_TYPE.MAGNITUDE && | ||
prevToken.type === TOKEN_TYPE.UNIT && | ||
currentToken.type === TOKEN_TYPE.TEN | ||
) return true; | ||
if ( | ||
prevToken.type === TOKEN_TYPE.TEN && | ||
currentToken.type === TOKEN_TYPE.UNIT | ||
) return true; | ||
if ( | ||
!impliedHundreds && | ||
prevToken.type === TOKEN_TYPE.TEN && | ||
currentToken.type === TOKEN_TYPE.UNIT | ||
) return true; | ||
if ( | ||
prevToken.type === TOKEN_TYPE.MAGNITUDE && | ||
currentToken.type === TOKEN_TYPE.MAGNITUDE | ||
) return true; | ||
if ( | ||
!impliedHundreds && | ||
prevToken.type === TOKEN_TYPE.TEN && | ||
currentToken.type === TOKEN_TYPE.TEN | ||
) return false; | ||
if ( | ||
impliedHundreds && | ||
prevToken.type === TOKEN_TYPE.TEN && | ||
currentToken.type === TOKEN_TYPE.TEN | ||
) return true; | ||
return false; | ||
@@ -35,17 +75,30 @@ }; | ||
if (!subRegion) { | ||
return currentToken.type; | ||
return { type: currentToken.type }; | ||
} | ||
if (subRegion.type === TOKEN_TYPE.MAGNITUDE) return TOKEN_TYPE.MAGNITUDE; | ||
return currentToken.type; | ||
const prevToken = subRegion.tokens[0]; | ||
const isHundred = ( | ||
(prevToken.type === TOKEN_TYPE.TEN && currentToken.type === TOKEN_TYPE.UNIT) || | ||
(prevToken.type === TOKEN_TYPE.TEN && currentToken.type === TOKEN_TYPE.TEN) || | ||
( | ||
prevToken.type === TOKEN_TYPE.UNIT && currentToken.type === TOKEN_TYPE.TEN && | ||
NUMBER[prevToken.lowerCaseValue] > 9 | ||
) || | ||
(prevToken.type === TOKEN_TYPE.UNIT && currentToken.type === TOKEN_TYPE.UNIT) || | ||
(prevToken.type === TOKEN_TYPE.TEN && currentToken.type === TOKEN_TYPE.UNIT && subRegion.type === TOKEN_TYPE.MAGNITUDE) | ||
); | ||
if (subRegion.type === TOKEN_TYPE.MAGNITUDE) return { type: TOKEN_TYPE.MAGNITUDE, isHundred }; | ||
if (isHundred) return { type: TOKEN_TYPE.HUNDRED, isHundred }; | ||
return { type: currentToken.type, isHundred }; | ||
}; | ||
const checkIfTokenFitsSubRegion = (subRegion, token) => { | ||
if (!subRegion) return { action: START_NEW_REGION, type: getSubRegionType(subRegion, token) }; | ||
if (canAddTokenToEndOfSubRegion(subRegion, token)) { | ||
return { action: ADD, type: getSubRegionType(subRegion, token) }; | ||
const checkIfTokenFitsSubRegion = (subRegion, token, options) => { | ||
const { type, isHundred } = getSubRegionType(subRegion, token); | ||
if (!subRegion) return { action: START_NEW_REGION, type, isHundred }; | ||
if (canAddTokenToEndOfSubRegion(subRegion, token, options)) { | ||
return { action: ADD, type, isHundred }; | ||
} | ||
return { action: START_NEW_REGION, type: getSubRegionType(subRegion, token) }; | ||
return { action: START_NEW_REGION, type, isHundred }; | ||
}; | ||
const getSubRegions = region => { | ||
const getSubRegions = (region, options) => { | ||
const subRegions = []; | ||
@@ -57,3 +110,4 @@ let currentSubRegion; | ||
const token = region.tokens[i]; | ||
const { action, type } = checkIfTokenFitsSubRegion(currentSubRegion, token); | ||
const { action, type, isHundred } = checkIfTokenFitsSubRegion(currentSubRegion, token, options); | ||
token.type = isHundred ? TOKEN_TYPE.HUNDRED : token.type; | ||
switch (action) { | ||
@@ -80,13 +134,21 @@ case ADD: { | ||
const canAddTokenToEndOfRegion = (region, token) => { | ||
const canAddTokenToEndOfRegion = (region, currentToken, { impliedHundreds }) => { | ||
const { tokens } = region; | ||
const { type } = token; | ||
const prevToken = tokens[tokens.length - 1]; | ||
if (prevToken.type === TOKEN_TYPE.UNIT && type === TOKEN_TYPE.UNIT) return false; | ||
if (prevToken.type === TOKEN_TYPE.UNIT && type === TOKEN_TYPE.TEN) return false; | ||
if (prevToken.type === TOKEN_TYPE.TEN && type === TOKEN_TYPE.TEN) return false; | ||
if ( | ||
!impliedHundreds && | ||
prevToken.type === TOKEN_TYPE.UNIT && | ||
currentToken.type === TOKEN_TYPE.UNIT && | ||
!region.hasDecimal | ||
) return false; | ||
if (!impliedHundreds && prevToken.type === TOKEN_TYPE.UNIT && currentToken.type === TOKEN_TYPE.TEN) return false; | ||
if (!impliedHundreds && prevToken.type === TOKEN_TYPE.TEN && currentToken.type === TOKEN_TYPE.TEN) return false; | ||
return true; | ||
}; | ||
const checkIfTokenFitsRegion = (region, token) => { | ||
const checkIfTokenFitsRegion = (region, token, options) => { | ||
const isDecimal = DECIMALS.includes(token.lowerCaseValue); | ||
if ((!region || !region.tokens.length) && isDecimal) { | ||
return START_NEW_REGION; | ||
} | ||
const isPunctuation = PUNCTUATION.includes(token.lowerCaseValue); | ||
@@ -96,8 +158,9 @@ if (isPunctuation) return SKIP; | ||
if (isJoiner) return SKIP; | ||
const isDecimal = DECIMALS.includes(token.lowerCaseValue); | ||
if (isDecimal) return ADD; | ||
if (isDecimal && !region.hasDecimal) { | ||
return ADD; | ||
} | ||
const isNumberWord = NUMBER_WORDS.includes(token.lowerCaseValue); | ||
if (isNumberWord) { | ||
if (!region) return START_NEW_REGION; | ||
if (canAddTokenToEndOfRegion(region, token)) { | ||
if (canAddTokenToEndOfRegion(region, token, options)) { | ||
return ADD; | ||
@@ -110,24 +173,2 @@ } | ||
const regionIsValid = region => { | ||
if (region.tokens.length === 1) { | ||
if (['a'].includes(region.tokens[0].lowerCaseValue)) return false; | ||
} | ||
return true; | ||
}; | ||
const getDecimalTokenIndex = (tokens) => tokens.reduce((acc, token, i) => | ||
DECIMALS.includes(token.lowerCaseValue) ? i : acc | ||
, -1); | ||
const getDecimalSubRegion = (tokens) => ({ | ||
start: tokens[0].start, | ||
end: tokens[tokens.length - 1].end, | ||
tokens: tokens.reduce((acc, token) => | ||
NUMBER_WORDS.concat(DECIMALS).includes(token.lowerCaseValue) ? | ||
acc.concat(token) : | ||
acc | ||
, []), | ||
type: TOKEN_TYPE.DECIMAL, | ||
}); | ||
const checkBlacklist = tokens => | ||
@@ -137,3 +178,3 @@ tokens.length === 1 && | ||
const matchRegions = (tokens) => { | ||
const matchRegions = (tokens, options) => { | ||
const regions = []; | ||
@@ -146,6 +187,5 @@ | ||
const tokensCount = tokens.length; | ||
const decimalIndex = getDecimalTokenIndex(tokens); | ||
while (i < (decimalIndex === -1 ? tokensCount : decimalIndex)) { | ||
while (i < tokensCount) { | ||
const token = tokens[i]; | ||
const tokenFits = checkIfTokenFitsRegion(currentRegion, token); | ||
const tokenFits = checkIfTokenFitsRegion(currentRegion, token, options); | ||
switch (tokenFits) { | ||
@@ -156,4 +196,9 @@ case SKIP: { | ||
case ADD: { | ||
currentRegion.end = token.end; | ||
currentRegion.tokens.push(token); | ||
if (currentRegion) { | ||
currentRegion.end = token.end; | ||
currentRegion.tokens.push(token); | ||
if (token.type === TOKEN_TYPE.DECIMAL) { | ||
currentRegion.hasDecimal = true; | ||
} | ||
} | ||
break; | ||
@@ -168,2 +213,5 @@ } | ||
regions.push(currentRegion); | ||
if (token.type === TOKEN_TYPE.DECIMAL) { | ||
currentRegion.hasDecimal = true; | ||
} | ||
break; | ||
@@ -179,20 +227,4 @@ } | ||
} | ||
return regions.reduce((acc, region) => { | ||
const decimalRegion = decimalIndex !== -1 ? | ||
getDecimalSubRegion(tokens.slice(decimalIndex)) : | ||
{ tokens: [] }; | ||
let subRegions = getSubRegions(region); | ||
if (decimalRegion.tokens.length) { | ||
subRegions = subRegions.concat(decimalRegion); | ||
} | ||
const regionWithDecimal = { | ||
...region, | ||
subRegions, | ||
}; | ||
regionWithDecimal.tokens = [ ...regionWithDecimal.tokens, ...decimalRegion.tokens ]; | ||
if (regionWithDecimal.tokens.length) { | ||
regionWithDecimal.end = regionWithDecimal.tokens[regionWithDecimal.tokens.length - 1].end; | ||
} | ||
return regionIsValid(decimalRegion) ? acc.concat(regionWithDecimal) : acc; | ||
}, []); | ||
return regions.map(region => ({ ...region, subRegions: getSubRegions(region, options) })); | ||
}; | ||
@@ -217,10 +249,10 @@ | ||
return end !== start ? | ||
acc.concat({ | ||
start, | ||
end: end - 1, | ||
value: unfuzzyChunk, | ||
lowerCaseValue: unfuzzyChunk.toLowerCase(), | ||
type: getTokenType(unfuzzyChunk, options), | ||
}) : | ||
acc; | ||
acc.concat({ | ||
start, | ||
end: end - 1, | ||
value: unfuzzyChunk, | ||
lowerCaseValue: unfuzzyChunk.toLowerCase(), | ||
type: getTokenType(unfuzzyChunk, options), | ||
}) : | ||
acc; | ||
}, []); | ||
@@ -227,0 +259,0 @@ const regions = matchRegions(tokens, options); |
61228
1620