parse-latin
Advanced tools
Comparing version 4.1.0 to 4.1.1
@@ -1,2 +0,2 @@ | ||
'use strict'; | ||
module.exports = require('./lib/index.js'); | ||
'use strict' | ||
module.exports = require('./lib') |
/* This module is generated by `script/build-expressions.js` */ | ||
'use strict'; | ||
'use strict' | ||
@@ -11,3 +11,3 @@ module.exports = { | ||
numerical: /^(?:[0-9\xB2\xB3\xB9\xBC-\xBE\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9\u0966-\u096F\u09E6-\u09EF\u09F4-\u09F9\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0B72-\u0B77\u0BE6-\u0BF2\u0C66-\u0C6F\u0C78-\u0C7E\u0CE6-\u0CEF\u0D66-\u0D75\u0DE6-\u0DEF\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F33\u1040-\u1049\u1090-\u1099\u1369-\u137C\u16EE-\u16F0\u17E0-\u17E9\u17F0-\u17F9\u1810-\u1819\u1946-\u194F\u19D0-\u19DA\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9\u1C40-\u1C49\u1C50-\u1C59\u2070\u2074-\u2079\u2080-\u2089\u2150-\u2182\u2185-\u2189\u2460-\u249B\u24EA-\u24FF\u2776-\u2793\u2CFD\u3007\u3021-\u3029\u3038-\u303A\u3192-\u3195\u3220-\u3229\u3248-\u324F\u3251-\u325F\u3280-\u3289\u32B1-\u32BF\uA620-\uA629\uA6E6-\uA6EF\uA830-\uA835\uA8D0-\uA8D9\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59\uABF0-\uABF9\uFF10-\uFF19]|\uD800[\uDD07-\uDD33\uDD40-\uDD78\uDD8A\uDD8B\uDEE1-\uDEFB\uDF20-\uDF23\uDF41\uDF4A\uDFD1-\uDFD5]|\uD801[\uDCA0-\uDCA9]|\uD802[\uDC58-\uDC5F\uDC79-\uDC7F\uDCA7-\uDCAF\uDCFB-\uDCFF\uDD16-\uDD1B\uDDBC\uDDBD\uDDC0-\uDDCF\uDDD2-\uDDFF\uDE40-\uDE47\uDE7D\uDE7E\uDE9D-\uDE9F\uDEEB-\uDEEF\uDF58-\uDF5F\uDF78-\uDF7F\uDFA9-\uDFAF]|\uD803[\uDCFA-\uDCFF\uDE60-\uDE7E]|\uD804[\uDC52-\uDC6F\uDCF0-\uDCF9\uDD36-\uDD3F\uDDD0-\uDDD9\uDDE1-\uDDF4\uDEF0-\uDEF9]|\uD805[\uDCD0-\uDCD9\uDE50-\uDE59\uDEC0-\uDEC9\uDF30-\uDF3B]|\uD806[\uDCE0-\uDCF2]|\uD809[\uDC00-\uDC6E]|\uD81A[\uDE60-\uDE69\uDF50-\uDF59\uDF5B-\uDF61]|\uD834[\uDF60-\uDF71]|\uD835[\uDFCE-\uDFFF]|\uD83A[\uDCC7-\uDCCF]|\uD83C[\uDD00-\uDD0C])+$/, | ||
digitStart: /^[0-9]/, | ||
digitStart: /^\d/, | ||
lowerInitial: /^(?:[a-z\xB5\xDF-\xF6\xF8-\xFF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02AF\u0371\u0373\u0377\u037B-\u037D\u0390\u03AC-\u03CE\u03D0\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0561-\u0587\u13F8-\u13FD\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6\u1FC7\u1FD0-\u1FD3\u1FD6\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6\u1FF7\u210A\u210E\u210F\u2113\u212F\u2134\u2139\u213C\u213D\u2146-\u2149\u214E\u2184\u2C30-\u2C5E\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7B5\uA7B7\uA7FA\uAB30-\uAB5A\uAB60-\uAB65\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A]|\uD801[\uDC28-\uDC4F]|\uD803[\uDCC0-\uDCF2]|\uD806[\uDCC0-\uDCDF]|\uD835[\uDC1A-\uDC33\uDC4E-\uDC54\uDC56-\uDC67\uDC82-\uDC9B\uDCB6-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDCCF\uDCEA-\uDD03\uDD1E-\uDD37\uDD52-\uDD6B\uDD86-\uDD9F\uDDBA-\uDDD3\uDDEE-\uDE07\uDE22-\uDE3B\uDE56-\uDE6F\uDE8A-\uDEA5\uDEC2-\uDEDA\uDEDC-\uDEE1\uDEFC-\uDF14\uDF16-\uDF1B\uDF36-\uDF4E\uDF50-\uDF55\uDF70-\uDF88\uDF8A-\uDF8F\uDFAA-\uDFC2\uDFC4-\uDFC9\uDFCB])/, | ||
@@ -18,2 +18,2 @@ surrogates: /[\uD800-\uDFFF]/, | ||
whiteSpace: /[\t-\r \x85\xA0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000]/ | ||
}; | ||
} |
330
lib/index.js
@@ -1,7 +0,7 @@ | ||
'use strict'; | ||
'use strict' | ||
var createParser = require('./parser'); | ||
var expressions = require('./expressions'); | ||
var createParser = require('./parser') | ||
var expressions = require('./expressions') | ||
module.exports = ParseLatin; | ||
module.exports = ParseLatin | ||
@@ -13,38 +13,38 @@ /* == PARSE LATIN ================================================== */ | ||
function ParseLatin(doc, file) { | ||
var value = file || doc; | ||
var value = file || doc | ||
if (!(this instanceof ParseLatin)) { | ||
return new ParseLatin(doc, file); | ||
return new ParseLatin(doc, file) | ||
} | ||
this.doc = value ? String(value) : null; | ||
this.doc = value ? String(value) : null | ||
} | ||
/* Quick access to the prototype. */ | ||
var proto = ParseLatin.prototype; | ||
var proto = ParseLatin.prototype | ||
/* Default position. */ | ||
proto.position = true; | ||
proto.position = true | ||
/* Create text nodes. */ | ||
proto.tokenizeSymbol = createTextFactory('Symbol'); | ||
proto.tokenizeWhiteSpace = createTextFactory('WhiteSpace'); | ||
proto.tokenizePunctuation = createTextFactory('Punctuation'); | ||
proto.tokenizeSource = createTextFactory('Source'); | ||
proto.tokenizeText = createTextFactory('Text'); | ||
proto.tokenizeSymbol = createTextFactory('Symbol') | ||
proto.tokenizeWhiteSpace = createTextFactory('WhiteSpace') | ||
proto.tokenizePunctuation = createTextFactory('Punctuation') | ||
proto.tokenizeSource = createTextFactory('Source') | ||
proto.tokenizeText = createTextFactory('Text') | ||
/* Expose `run`. */ | ||
proto.run = run; | ||
proto.run = run | ||
/* Inject `plugins` to modifiy the result of the method | ||
* at `key` on the operated on context. */ | ||
proto.use = useFactory(function (context, key, plugins) { | ||
context[key] = context[key].concat(plugins); | ||
}); | ||
proto.use = useFactory(function(context, key, plugins) { | ||
context[key] = context[key].concat(plugins) | ||
}) | ||
/* Inject `plugins` to modifiy the result of the method | ||
* at `key` on the operated on context, before any other. */ | ||
proto.useFirst = useFactory(function (context, key, plugins) { | ||
context[key] = plugins.concat(context[key]); | ||
}); | ||
proto.useFirst = useFactory(function(context, key, plugins) { | ||
context[key] = plugins.concat(context[key]) | ||
}) | ||
@@ -55,10 +55,10 @@ /* Easy access to the document parser. This additionally | ||
* construction. */ | ||
proto.parse = function (value) { | ||
return this.tokenizeRoot(value || this.doc); | ||
}; | ||
proto.parse = function(value) { | ||
return this.tokenizeRoot(value || this.doc) | ||
} | ||
/* Transform a `value` into a list of `NLCSTNode`s. */ | ||
proto.tokenize = function (value) { | ||
return tokenize(this, value); | ||
}; | ||
proto.tokenize = function(value) { | ||
return tokenize(this, value) | ||
} | ||
@@ -80,10 +80,10 @@ /* == PARENT NODES ================================================= | ||
* `TextNode`, its value set to the given `value`. */ | ||
pluggable(ParseLatin, 'tokenizeWord', function (value, eat) { | ||
var add = (eat || noopEat)(''); | ||
var parent = {type: 'WordNode', children: []}; | ||
pluggable(ParseLatin, 'tokenizeWord', function(value, eat) { | ||
var add = (eat || noopEat)('') | ||
var parent = {type: 'WordNode', children: []} | ||
this.tokenizeText(value, eat, parent); | ||
this.tokenizeText(value, eat, parent) | ||
return add(parent); | ||
}); | ||
return add(parent) | ||
}) | ||
@@ -97,6 +97,10 @@ /* Create a `SentenceNode` with its children set to | ||
* `PunctuationNode`s, and `WhiteSpaceNode`s. */ | ||
pluggable(ParseLatin, 'tokenizeSentence', createParser({ | ||
type: 'SentenceNode', | ||
tokenizer: 'tokenize' | ||
})); | ||
pluggable( | ||
ParseLatin, | ||
'tokenizeSentence', | ||
createParser({ | ||
type: 'SentenceNode', | ||
tokenizer: 'tokenize' | ||
}) | ||
) | ||
@@ -109,17 +113,25 @@ /* Create a `ParagraphNode` with its children set to | ||
* populated by `SentenceNode`s and `WhiteSpaceNode`s. */ | ||
pluggable(ParseLatin, 'tokenizeParagraph', createParser({ | ||
type: 'ParagraphNode', | ||
delimiter: expressions.terminalMarker, | ||
delimiterType: 'PunctuationNode', | ||
tokenizer: 'tokenizeSentence' | ||
})); | ||
pluggable( | ||
ParseLatin, | ||
'tokenizeParagraph', | ||
createParser({ | ||
type: 'ParagraphNode', | ||
delimiter: expressions.terminalMarker, | ||
delimiterType: 'PunctuationNode', | ||
tokenizer: 'tokenizeSentence' | ||
}) | ||
) | ||
/* Create a `RootNode` with its children set to `Node`s, | ||
* their values set to the tokenized given `value`. */ | ||
pluggable(ParseLatin, 'tokenizeRoot', createParser({ | ||
type: 'RootNode', | ||
delimiter: expressions.newLine, | ||
delimiterType: 'WhiteSpaceNode', | ||
tokenizer: 'tokenizeParagraph' | ||
})); | ||
pluggable( | ||
ParseLatin, | ||
'tokenizeRoot', | ||
createParser({ | ||
type: 'RootNode', | ||
delimiter: expressions.newLine, | ||
delimiterType: 'WhiteSpaceNode', | ||
tokenizer: 'tokenizeParagraph' | ||
}) | ||
) | ||
@@ -136,3 +148,3 @@ /* == PLUGINS ====================================================== */ | ||
require('./plugin/patch-position') | ||
]); | ||
]) | ||
@@ -152,3 +164,3 @@ proto.use('tokenizeParagraph', [ | ||
require('./plugin/patch-position') | ||
]); | ||
]) | ||
@@ -160,3 +172,3 @@ proto.use('tokenizeRoot', [ | ||
require('./plugin/patch-position') | ||
]); | ||
]) | ||
@@ -167,5 +179,5 @@ /* == TEXT NODES =================================================== */ | ||
function createTextFactory(type) { | ||
type += 'Node'; | ||
type += 'Node' | ||
return createText; | ||
return createText | ||
@@ -175,9 +187,12 @@ /* Construct a `Text` from a bound `type` */ | ||
if (value === null || value === undefined) { | ||
value = ''; | ||
value = '' | ||
} | ||
return (eat || noopEat)(value)({ | ||
type: type, | ||
value: String(value) | ||
}, parent); | ||
return (eat || noopEat)(value)( | ||
{ | ||
type: type, | ||
value: String(value) | ||
}, | ||
parent | ||
) | ||
} | ||
@@ -188,13 +203,13 @@ } | ||
function run(key, nodes) { | ||
var wareKey = key + 'Plugins'; | ||
var plugins = this[wareKey]; | ||
var index = -1; | ||
var wareKey = key + 'Plugins' | ||
var plugins = this[wareKey] | ||
var index = -1 | ||
if (plugins) { | ||
while (plugins[++index]) { | ||
plugins[index](nodes); | ||
plugins[index](nodes) | ||
} | ||
} | ||
return nodes; | ||
return nodes | ||
} | ||
@@ -206,5 +221,5 @@ | ||
* on `Constructor`. */ | ||
Constructor.prototype[key] = function () { | ||
return this.run(key, callback.apply(this, arguments)); | ||
}; | ||
Constructor.prototype[key] = function() { | ||
return this.run(key, callback.apply(this, arguments)) | ||
} | ||
} | ||
@@ -215,3 +230,3 @@ | ||
function useFactory(callback) { | ||
return use; | ||
return use | ||
@@ -221,4 +236,4 @@ /* Validate if `plugins` can be inserted. Invokes | ||
function use(key, plugins) { | ||
var self = this; | ||
var wareKey; | ||
var self = this | ||
var wareKey | ||
@@ -229,5 +244,5 @@ /* Throw if the method is not pluggable. */ | ||
'Illegal Invocation: Unsupported `key` for ' + | ||
'`use(key, plugins)`. Make sure `key` is a ' + | ||
'supported function' | ||
); | ||
'`use(key, plugins)`. Make sure `key` is a ' + | ||
'supported function' | ||
) | ||
} | ||
@@ -237,12 +252,12 @@ | ||
if (!plugins) { | ||
return; | ||
return | ||
} | ||
wareKey = key + 'Plugins'; | ||
wareKey = key + 'Plugins' | ||
/* Make sure `plugins` is a list. */ | ||
if (typeof plugins === 'function') { | ||
plugins = [plugins]; | ||
plugins = [plugins] | ||
} else { | ||
plugins = plugins.concat(); | ||
plugins = plugins.concat() | ||
} | ||
@@ -252,7 +267,7 @@ | ||
if (!self[wareKey]) { | ||
self[wareKey] = []; | ||
self[wareKey] = [] | ||
} | ||
/* Invoke callback with the ware key and plugins. */ | ||
callback(self, wareKey, plugins); | ||
callback(self, wareKey, plugins) | ||
} | ||
@@ -264,32 +279,32 @@ } | ||
/* Match a word character. */ | ||
var WORD = expressions.word; | ||
var WORD = expressions.word | ||
/* Match a surrogate character. */ | ||
var SURROGATES = expressions.surrogates; | ||
var SURROGATES = expressions.surrogates | ||
/* Match a punctuation character. */ | ||
var PUNCTUATION = expressions.punctuation; | ||
var PUNCTUATION = expressions.punctuation | ||
/* Match a white space character. */ | ||
var WHITE_SPACE = expressions.whiteSpace; | ||
var WHITE_SPACE = expressions.whiteSpace | ||
/* Transform a `value` into a list of `NLCSTNode`s. */ | ||
function tokenize(parser, value) { | ||
var tokens; | ||
var offset; | ||
var line; | ||
var column; | ||
var index; | ||
var length; | ||
var character; | ||
var queue; | ||
var prev; | ||
var left; | ||
var right; | ||
var eater; | ||
var tokens | ||
var offset | ||
var line | ||
var column | ||
var index | ||
var length | ||
var character | ||
var queue | ||
var prev | ||
var left | ||
var right | ||
var eater | ||
if (value === null || value === undefined) { | ||
value = ''; | ||
value = '' | ||
} else if (value instanceof String) { | ||
value = value.toString(); | ||
value = value.toString() | ||
} | ||
@@ -302,55 +317,56 @@ | ||
if ('length' in value && (!value[0] || value[0].type)) { | ||
return value; | ||
return value | ||
} | ||
throw new Error( | ||
'Illegal invocation: \'' + value + '\'' + | ||
' is not a valid argument for \'ParseLatin\'' | ||
); | ||
"Illegal invocation: '" + | ||
value + | ||
"' is not a valid argument for 'ParseLatin'" | ||
) | ||
} | ||
tokens = []; | ||
tokens = [] | ||
if (!value) { | ||
return tokens; | ||
return tokens | ||
} | ||
index = 0; | ||
offset = 0; | ||
line = 1; | ||
column = 1; | ||
index = 0 | ||
offset = 0 | ||
line = 1 | ||
column = 1 | ||
/* Eat mechanism to use. */ | ||
eater = parser.position ? eat : noPositionEat; | ||
eater = parser.position ? eat : noPositionEat | ||
length = value.length; | ||
prev = ''; | ||
queue = ''; | ||
length = value.length | ||
prev = '' | ||
queue = '' | ||
while (index < length) { | ||
character = value.charAt(index); | ||
character = value.charAt(index) | ||
if (WHITE_SPACE.test(character)) { | ||
right = 'WhiteSpace'; | ||
right = 'WhiteSpace' | ||
} else if (PUNCTUATION.test(character)) { | ||
right = 'Punctuation'; | ||
right = 'Punctuation' | ||
} else if (WORD.test(character)) { | ||
right = 'Word'; | ||
right = 'Word' | ||
} else { | ||
right = 'Symbol'; | ||
right = 'Symbol' | ||
} | ||
tick(); | ||
tick() | ||
prev = character; | ||
character = ''; | ||
left = right; | ||
right = null; | ||
prev = character | ||
character = '' | ||
left = right | ||
right = null | ||
index++; | ||
index++ | ||
} | ||
tick(); | ||
tick() | ||
return tokens; | ||
return tokens | ||
@@ -361,17 +377,15 @@ /* Check one character. */ | ||
left === right && | ||
( | ||
left === 'Word' || | ||
(left === 'Word' || | ||
left === 'WhiteSpace' || | ||
character === prev || | ||
SURROGATES.test(character) | ||
) | ||
SURROGATES.test(character)) | ||
) { | ||
queue += character; | ||
queue += character | ||
} else { | ||
/* Flush the previous queue. */ | ||
if (queue) { | ||
parser['tokenize' + left](queue, eater); | ||
parser['tokenize' + left](queue, eater) | ||
} | ||
queue = character; | ||
queue = character | ||
} | ||
@@ -384,7 +398,7 @@ } | ||
function eat(subvalue) { | ||
var pos = position(); | ||
var pos = position() | ||
update(subvalue); | ||
update(subvalue) | ||
return apply; | ||
return apply | ||
@@ -394,3 +408,3 @@ /* Add the given arguments, add `position` to | ||
function apply() { | ||
return pos(add.apply(null, arguments)); | ||
return pos(add.apply(null, arguments)) | ||
} | ||
@@ -402,7 +416,7 @@ } | ||
function noPositionEat() { | ||
return apply; | ||
return apply | ||
/* Add the given arguments and return the node. */ | ||
function apply() { | ||
return add.apply(null, arguments); | ||
return add.apply(null, arguments) | ||
} | ||
@@ -414,8 +428,8 @@ } | ||
if (parent) { | ||
parent.children.push(node); | ||
parent.children.push(node) | ||
} else { | ||
tokens.push(node); | ||
tokens.push(node) | ||
} | ||
return node; | ||
return node | ||
} | ||
@@ -425,12 +439,12 @@ | ||
function position() { | ||
var before = now(); | ||
var before = now() | ||
/* Add the position to a node. */ | ||
function patch(node) { | ||
node.position = new Position(before); | ||
node.position = new Position(before) | ||
return node; | ||
return node | ||
} | ||
return patch; | ||
return patch | ||
} | ||
@@ -440,12 +454,12 @@ | ||
function update(subvalue) { | ||
var subvalueLength = subvalue.length; | ||
var character = -1; | ||
var lastIndex = -1; | ||
var subvalueLength = subvalue.length | ||
var character = -1 | ||
var lastIndex = -1 | ||
offset += subvalueLength; | ||
offset += subvalueLength | ||
while (++character < subvalueLength) { | ||
if (subvalue.charAt(character) === '\n') { | ||
lastIndex = character; | ||
line++; | ||
lastIndex = character | ||
line++ | ||
} | ||
@@ -455,5 +469,5 @@ } | ||
if (lastIndex === -1) { | ||
column += subvalueLength; | ||
column += subvalueLength | ||
} else { | ||
column = subvalueLength - lastIndex; | ||
column = subvalueLength - lastIndex | ||
} | ||
@@ -464,4 +478,4 @@ } | ||
function Position(start) { | ||
this.start = start; | ||
this.end = now(); | ||
this.start = start | ||
this.end = now() | ||
} | ||
@@ -475,3 +489,3 @@ | ||
offset: offset | ||
}; | ||
} | ||
} | ||
@@ -484,6 +498,6 @@ } | ||
if (parent) { | ||
parent.children.push(node); | ||
parent.children.push(node) | ||
} | ||
return node; | ||
return node | ||
} | ||
@@ -495,3 +509,3 @@ | ||
function noopEat() { | ||
return noopAdd; | ||
return noopAdd | ||
} |
@@ -1,18 +0,18 @@ | ||
'use strict'; | ||
'use strict' | ||
var tokenizer = require('./tokenizer'); | ||
var tokenizer = require('./tokenizer') | ||
module.exports = parserFactory; | ||
module.exports = parserFactory | ||
/* Construct a parser based on `options`. */ | ||
function parserFactory(options) { | ||
var type = options.type; | ||
var tokenizerProperty = options.tokenizer; | ||
var delimiter = options.delimiter; | ||
var tokenize = delimiter && tokenizer(options.delimiterType, delimiter); | ||
var type = options.type | ||
var tokenizerProperty = options.tokenizer | ||
var delimiter = options.delimiter | ||
var tokenize = delimiter && tokenizer(options.delimiterType, delimiter) | ||
return parser; | ||
return parser | ||
function parser(value) { | ||
var children = this[tokenizerProperty](value); | ||
var children = this[tokenizerProperty](value) | ||
@@ -22,4 +22,4 @@ return { | ||
children: tokenize ? tokenize(children) : children | ||
}; | ||
} | ||
} | ||
} |
@@ -1,11 +0,11 @@ | ||
'use strict'; | ||
'use strict' | ||
var toString = require('nlcst-to-string'); | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var expressions = require('../expressions'); | ||
var toString = require('nlcst-to-string') | ||
var modifyChildren = require('unist-util-modify-children') | ||
var expressions = require('../expressions') | ||
module.exports = modifyChildren(breakImplicitSentences); | ||
module.exports = modifyChildren(breakImplicitSentences) | ||
/* Two or more new line characters. */ | ||
var MULTI_NEW_LINE = expressions.newLineMulti; | ||
var MULTI_NEW_LINE = expressions.newLineMulti | ||
@@ -15,23 +15,23 @@ /* Break a sentence if a white space with more | ||
function breakImplicitSentences(child, index, parent) { | ||
var children; | ||
var position; | ||
var length; | ||
var tail; | ||
var head; | ||
var end; | ||
var insertion; | ||
var node; | ||
var children | ||
var position | ||
var length | ||
var tail | ||
var head | ||
var end | ||
var insertion | ||
var node | ||
if (child.type !== 'SentenceNode') { | ||
return; | ||
return | ||
} | ||
children = child.children; | ||
children = child.children | ||
/* Ignore first and last child. */ | ||
length = children.length - 1; | ||
position = 0; | ||
length = children.length - 1 | ||
position = 0 | ||
while (++position < length) { | ||
node = children[position]; | ||
node = children[position] | ||
@@ -42,6 +42,6 @@ if ( | ||
) { | ||
continue; | ||
continue | ||
} | ||
child.children = children.slice(0, position); | ||
child.children = children.slice(0, position) | ||
@@ -51,13 +51,13 @@ insertion = { | ||
children: children.slice(position + 1) | ||
}; | ||
} | ||
tail = children[position - 1]; | ||
head = children[position + 1]; | ||
tail = children[position - 1] | ||
head = children[position + 1] | ||
parent.children.splice(index + 1, 0, node, insertion); | ||
parent.children.splice(index + 1, 0, node, insertion) | ||
if (child.position && tail.position && head.position) { | ||
end = child.position.end; | ||
end = child.position.end | ||
child.position.end = tail.position.end; | ||
child.position.end = tail.position.end | ||
@@ -67,7 +67,7 @@ insertion.position = { | ||
end: end | ||
}; | ||
} | ||
} | ||
return index + 1; | ||
return index + 1 | ||
} | ||
} |
@@ -1,6 +0,6 @@ | ||
'use strict'; | ||
'use strict' | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var modifyChildren = require('unist-util-modify-children') | ||
module.exports = modifyChildren(makeFinalWhiteSpaceSiblings); | ||
module.exports = modifyChildren(makeFinalWhiteSpaceSiblings) | ||
@@ -10,4 +10,4 @@ /* Move white space ending a paragraph up, so they are | ||
function makeFinalWhiteSpaceSiblings(child, index, parent) { | ||
var children = child.children; | ||
var prev; | ||
var children = child.children | ||
var prev | ||
@@ -19,12 +19,12 @@ if ( | ||
) { | ||
parent.children.splice(index + 1, 0, child.children.pop()); | ||
prev = children[children.length - 1]; | ||
parent.children.splice(index + 1, 0, child.children.pop()) | ||
prev = children[children.length - 1] | ||
if (prev && prev.position && child.position) { | ||
child.position.end = prev.position.end; | ||
child.position.end = prev.position.end | ||
} | ||
/* Next, iterate over the current node again. */ | ||
return index; | ||
return index | ||
} | ||
} |
@@ -1,6 +0,6 @@ | ||
'use strict'; | ||
'use strict' | ||
var visitChildren = require('unist-util-visit-children'); | ||
var visitChildren = require('unist-util-visit-children') | ||
module.exports = visitChildren(makeInitialWhiteSpaceSiblings); | ||
module.exports = visitChildren(makeInitialWhiteSpaceSiblings) | ||
@@ -10,13 +10,17 @@ /* Move white space starting a sentence up, so they are | ||
function makeInitialWhiteSpaceSiblings(child, index, parent) { | ||
var children = child.children; | ||
var next; | ||
var children = child.children | ||
var next | ||
if (children && children.length !== 0 && children[0].type === 'WhiteSpaceNode') { | ||
parent.children.splice(index, 0, children.shift()); | ||
next = children[0]; | ||
if ( | ||
children && | ||
children.length !== 0 && | ||
children[0].type === 'WhiteSpaceNode' | ||
) { | ||
parent.children.splice(index, 0, children.shift()) | ||
next = children[0] | ||
if (next && next.position && child.position) { | ||
child.position.start = next.position.start; | ||
child.position.start = next.position.start | ||
} | ||
} | ||
} |
@@ -1,7 +0,7 @@ | ||
'use strict'; | ||
'use strict' | ||
var toString = require('nlcst-to-string'); | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var toString = require('nlcst-to-string') | ||
var modifyChildren = require('unist-util-modify-children') | ||
module.exports = modifyChildren(mergeAffixExceptions); | ||
module.exports = modifyChildren(mergeAffixExceptions) | ||
@@ -11,44 +11,44 @@ /* Merge a sentence into its previous sentence, when | ||
function mergeAffixExceptions(child, index, parent) { | ||
var children = child.children; | ||
var node; | ||
var position; | ||
var value; | ||
var previousChild; | ||
var children = child.children | ||
var node | ||
var position | ||
var value | ||
var previousChild | ||
if (!children || children.length === 0 || index === 0) { | ||
return; | ||
return | ||
} | ||
position = -1; | ||
position = -1 | ||
while (children[++position]) { | ||
node = children[position]; | ||
node = children[position] | ||
if (node.type === 'WordNode') { | ||
return; | ||
return | ||
} | ||
if (node.type === 'SymbolNode' || node.type === 'PunctuationNode') { | ||
value = toString(node); | ||
value = toString(node) | ||
if (value !== ',' && value !== ';') { | ||
return; | ||
return | ||
} | ||
previousChild = parent.children[index - 1]; | ||
previousChild = parent.children[index - 1] | ||
previousChild.children = previousChild.children.concat(children); | ||
previousChild.children = previousChild.children.concat(children) | ||
/* Update position. */ | ||
if (previousChild.position && child.position) { | ||
previousChild.position.end = child.position.end; | ||
previousChild.position.end = child.position.end | ||
} | ||
parent.children.splice(index, 1); | ||
parent.children.splice(index, 1) | ||
/* Next, iterate over the node *now* at the current | ||
* position. */ | ||
return index; | ||
return index | ||
} | ||
} | ||
} |
@@ -1,8 +0,8 @@ | ||
'use strict'; | ||
'use strict' | ||
var toString = require('nlcst-to-string'); | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var expressions = require('../expressions'); | ||
var toString = require('nlcst-to-string') | ||
var modifyChildren = require('unist-util-modify-children') | ||
var expressions = require('../expressions') | ||
module.exports = modifyChildren(mergeAffixSymbol); | ||
module.exports = modifyChildren(mergeAffixSymbol) | ||
@@ -13,3 +13,3 @@ /* Closing or final punctuation, or terminal markers | ||
* terminal marker. */ | ||
var AFFIX_SYMBOL = expressions.affixSymbol; | ||
var AFFIX_SYMBOL = expressions.affixSymbol | ||
@@ -20,11 +20,11 @@ /* Move certain punctuation following a terminal | ||
function mergeAffixSymbol(child, index, parent) { | ||
var children = child.children; | ||
var first; | ||
var second; | ||
var prev; | ||
var children = child.children | ||
var first | ||
var second | ||
var prev | ||
if (children && children.length !== 0 && index !== 0) { | ||
first = children[0]; | ||
second = children[1]; | ||
prev = parent.children[index - 1]; | ||
first = children[0] | ||
second = children[1] | ||
prev = parent.children[index - 1] | ||
@@ -35,17 +35,17 @@ if ( | ||
) { | ||
prev.children.push(children.shift()); | ||
prev.children.push(children.shift()) | ||
/* Update position. */ | ||
if (first.position && prev.position) { | ||
prev.position.end = first.position.end; | ||
prev.position.end = first.position.end | ||
} | ||
if (second && second.position && child.position) { | ||
child.position.start = second.position.start; | ||
child.position.start = second.position.start | ||
} | ||
/* Next, iterate over the previous node again. */ | ||
return index - 1; | ||
return index - 1 | ||
} | ||
} | ||
} |
@@ -1,7 +0,7 @@ | ||
'use strict'; | ||
'use strict' | ||
var toString = require('nlcst-to-string'); | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var toString = require('nlcst-to-string') | ||
var modifyChildren = require('unist-util-modify-children') | ||
module.exports = modifyChildren(mergeFinalWordSymbol); | ||
module.exports = modifyChildren(mergeFinalWordSymbol) | ||
@@ -11,5 +11,5 @@ /* Merge certain punctuation marks into their | ||
function mergeFinalWordSymbol(child, index, parent) { | ||
var children; | ||
var prev; | ||
var next; | ||
var children | ||
var prev | ||
var next | ||
@@ -21,6 +21,6 @@ if ( | ||
) { | ||
children = parent.children; | ||
children = parent.children | ||
prev = children[index - 1]; | ||
next = children[index + 1]; | ||
prev = children[index - 1] | ||
next = children[index + 1] | ||
@@ -32,11 +32,11 @@ if ( | ||
/* Remove `child` from parent. */ | ||
children.splice(index, 1); | ||
children.splice(index, 1) | ||
/* Add the punctuation mark at the end of the | ||
* previous node. */ | ||
prev.children.push(child); | ||
prev.children.push(child) | ||
/* Update position. */ | ||
if (prev.position && child.position) { | ||
prev.position.end = child.position.end; | ||
prev.position.end = child.position.end | ||
} | ||
@@ -46,5 +46,5 @@ | ||
* current position (which was the next node). */ | ||
return index; | ||
return index | ||
} | ||
} | ||
} |
@@ -1,11 +0,11 @@ | ||
'use strict'; | ||
'use strict' | ||
var toString = require('nlcst-to-string'); | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var expressions = require('../expressions'); | ||
var toString = require('nlcst-to-string') | ||
var modifyChildren = require('unist-util-modify-children') | ||
var expressions = require('../expressions') | ||
module.exports = modifyChildren(mergeInitialDigitSentences); | ||
module.exports = modifyChildren(mergeInitialDigitSentences) | ||
/* Initial lowercase letter. */ | ||
var DIGIT = expressions.digitStart; | ||
var DIGIT = expressions.digitStart | ||
@@ -15,14 +15,14 @@ /* Merge a sentence into its previous sentence, when | ||
function mergeInitialDigitSentences(child, index, parent) { | ||
var children = child.children; | ||
var siblings = parent.children; | ||
var prev = siblings[index - 1]; | ||
var head = children[0]; | ||
var children = child.children | ||
var siblings = parent.children | ||
var prev = siblings[index - 1] | ||
var head = children[0] | ||
if (prev && head && head.type === 'WordNode' && DIGIT.test(toString(head))) { | ||
prev.children = prev.children.concat(children); | ||
siblings.splice(index, 1); | ||
prev.children = prev.children.concat(children) | ||
siblings.splice(index, 1) | ||
/* Update position. */ | ||
if (prev.position && child.position) { | ||
prev.position.end = child.position.end; | ||
prev.position.end = child.position.end | ||
} | ||
@@ -32,4 +32,4 @@ | ||
* the current position. */ | ||
return index; | ||
return index | ||
} | ||
} |
@@ -1,11 +0,11 @@ | ||
'use strict'; | ||
'use strict' | ||
var toString = require('nlcst-to-string'); | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var expressions = require('../expressions'); | ||
var toString = require('nlcst-to-string') | ||
var modifyChildren = require('unist-util-modify-children') | ||
var expressions = require('../expressions') | ||
module.exports = modifyChildren(mergeInitialLowerCaseLetterSentences); | ||
module.exports = modifyChildren(mergeInitialLowerCaseLetterSentences) | ||
/* Initial lowercase letter. */ | ||
var LOWER_INITIAL = expressions.lowerInitial; | ||
var LOWER_INITIAL = expressions.lowerInitial | ||
@@ -15,30 +15,30 @@ /* Merge a sentence into its previous sentence, when | ||
function mergeInitialLowerCaseLetterSentences(child, index, parent) { | ||
var children = child.children; | ||
var position; | ||
var node; | ||
var siblings; | ||
var prev; | ||
var children = child.children | ||
var position | ||
var node | ||
var siblings | ||
var prev | ||
if (children && children.length !== 0 && index !== 0) { | ||
position = -1; | ||
position = -1 | ||
while (children[++position]) { | ||
node = children[position]; | ||
node = children[position] | ||
if (node.type === 'WordNode') { | ||
if (!LOWER_INITIAL.test(toString(node))) { | ||
return; | ||
return | ||
} | ||
siblings = parent.children; | ||
siblings = parent.children | ||
prev = siblings[index - 1]; | ||
prev = siblings[index - 1] | ||
prev.children = prev.children.concat(children); | ||
prev.children = prev.children.concat(children) | ||
siblings.splice(index, 1); | ||
siblings.splice(index, 1) | ||
/* Update position. */ | ||
if (prev.position && child.position) { | ||
prev.position.end = child.position.end; | ||
prev.position.end = child.position.end | ||
} | ||
@@ -48,7 +48,7 @@ | ||
* the current position. */ | ||
return index; | ||
return index | ||
} | ||
if (node.type === 'SymbolNode' || node.type === 'PunctuationNode') { | ||
return; | ||
return | ||
} | ||
@@ -55,0 +55,0 @@ } |
@@ -1,7 +0,7 @@ | ||
'use strict'; | ||
'use strict' | ||
var toString = require('nlcst-to-string'); | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var toString = require('nlcst-to-string') | ||
var modifyChildren = require('unist-util-modify-children') | ||
module.exports = modifyChildren(mergeInitialWordSymbol); | ||
module.exports = modifyChildren(mergeInitialWordSymbol) | ||
@@ -11,4 +11,4 @@ /* Merge certain punctuation marks into their | ||
function mergeInitialWordSymbol(child, index, parent) { | ||
var children; | ||
var next; | ||
var children | ||
var next | ||
@@ -19,8 +19,8 @@ if ( | ||
) { | ||
return; | ||
return | ||
} | ||
children = parent.children; | ||
children = parent.children | ||
next = children[index + 1]; | ||
next = children[index + 1] | ||
@@ -33,15 +33,15 @@ /* If either a previous word, or no following word, | ||
) { | ||
return; | ||
return | ||
} | ||
/* Remove `child` from parent. */ | ||
children.splice(index, 1); | ||
children.splice(index, 1) | ||
/* Add the punctuation mark at the start of the | ||
* next node. */ | ||
next.children.unshift(child); | ||
next.children.unshift(child) | ||
/* Update position. */ | ||
if (next.position && child.position) { | ||
next.position.start = child.position.start; | ||
next.position.start = child.position.start | ||
} | ||
@@ -52,3 +52,3 @@ | ||
* word. */ | ||
return index - 1; | ||
return index - 1 | ||
} |
@@ -1,43 +0,39 @@ | ||
'use strict'; | ||
'use strict' | ||
var toString = require('nlcst-to-string'); | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var expressions = require('../expressions'); | ||
var toString = require('nlcst-to-string') | ||
var modifyChildren = require('unist-util-modify-children') | ||
var expressions = require('../expressions') | ||
module.exports = modifyChildren(mergeInitialisms); | ||
module.exports = modifyChildren(mergeInitialisms) | ||
var NUMERICAL = expressions.numerical; | ||
var NUMERICAL = expressions.numerical | ||
/* Merge initialisms. */ | ||
function mergeInitialisms(child, index, parent) { | ||
var siblings; | ||
var prev; | ||
var children; | ||
var length; | ||
var position; | ||
var otherChild; | ||
var isAllDigits; | ||
var value; | ||
var siblings | ||
var prev | ||
var children | ||
var length | ||
var position | ||
var otherChild | ||
var isAllDigits | ||
var value | ||
if (index !== 0 && toString(child) === '.') { | ||
siblings = parent.children; | ||
siblings = parent.children | ||
prev = siblings[index - 1]; | ||
children = prev.children; | ||
prev = siblings[index - 1] | ||
children = prev.children | ||
length = children && children.length; | ||
length = children && children.length | ||
if ( | ||
prev.type === 'WordNode' && | ||
length !== 1 && | ||
length % 2 !== 0 | ||
) { | ||
position = length; | ||
if (prev.type === 'WordNode' && length !== 1 && length % 2 !== 0) { | ||
position = length | ||
isAllDigits = true; | ||
isAllDigits = true | ||
while (children[--position]) { | ||
otherChild = children[position]; | ||
otherChild = children[position] | ||
value = toString(otherChild); | ||
value = toString(otherChild) | ||
@@ -48,13 +44,13 @@ if (position % 2 === 0) { | ||
if (value.length > 1) { | ||
return; | ||
return | ||
} | ||
if (!NUMERICAL.test(value)) { | ||
isAllDigits = false; | ||
isAllDigits = false | ||
} | ||
} else if (value !== '.') { | ||
if (position < length - 2) { | ||
break; | ||
break | ||
} else { | ||
return; | ||
return | ||
} | ||
@@ -66,10 +62,10 @@ } | ||
/* Remove `child` from parent. */ | ||
siblings.splice(index, 1); | ||
siblings.splice(index, 1) | ||
/* Add child to the previous children. */ | ||
children.push(child); | ||
children.push(child) | ||
/* Update position. */ | ||
if (prev.position && child.position) { | ||
prev.position.end = child.position.end; | ||
prev.position.end = child.position.end | ||
} | ||
@@ -79,3 +75,3 @@ | ||
* position. */ | ||
return index; | ||
return index | ||
} | ||
@@ -82,0 +78,0 @@ } |
@@ -1,23 +0,23 @@ | ||
'use strict'; | ||
'use strict' | ||
var toString = require('nlcst-to-string'); | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var toString = require('nlcst-to-string') | ||
var modifyChildren = require('unist-util-modify-children') | ||
module.exports = modifyChildren(mergeInnerWordSlash); | ||
module.exports = modifyChildren(mergeInnerWordSlash) | ||
var C_SLASH = '/'; | ||
var C_SLASH = '/' | ||
/* Merge words joined by certain punctuation marks. */ | ||
function mergeInnerWordSlash(child, index, parent) { | ||
var siblings = parent.children; | ||
var prev; | ||
var next; | ||
var prevValue; | ||
var nextValue; | ||
var queue; | ||
var tail; | ||
var count; | ||
var siblings = parent.children | ||
var prev | ||
var next | ||
var prevValue | ||
var nextValue | ||
var queue | ||
var tail | ||
var count | ||
prev = siblings[index - 1]; | ||
next = siblings[index + 1]; | ||
prev = siblings[index - 1] | ||
next = siblings[index + 1] | ||
@@ -30,26 +30,23 @@ if ( | ||
) { | ||
prevValue = toString(prev); | ||
tail = child; | ||
queue = [child]; | ||
count = 1; | ||
prevValue = toString(prev) | ||
tail = child | ||
queue = [child] | ||
count = 1 | ||
if (next && next.type === 'WordNode') { | ||
nextValue = toString(next); | ||
tail = next; | ||
queue = queue.concat(next.children); | ||
count++; | ||
nextValue = toString(next) | ||
tail = next | ||
queue = queue.concat(next.children) | ||
count++ | ||
} | ||
if ( | ||
prevValue.length < 3 && | ||
(!nextValue || nextValue.length < 3) | ||
) { | ||
if (prevValue.length < 3 && (!nextValue || nextValue.length < 3)) { | ||
/* Add all found tokens to `prev`s children. */ | ||
prev.children = prev.children.concat(queue); | ||
prev.children = prev.children.concat(queue) | ||
siblings.splice(index, count); | ||
siblings.splice(index, count) | ||
/* Update position. */ | ||
if (prev.position && tail.position) { | ||
prev.position.end = tail.position.end; | ||
prev.position.end = tail.position.end | ||
} | ||
@@ -59,5 +56,5 @@ | ||
* position. */ | ||
return index; | ||
return index | ||
} | ||
} | ||
} |
@@ -1,31 +0,34 @@ | ||
'use strict'; | ||
'use strict' | ||
var toString = require('nlcst-to-string'); | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var expressions = require('../expressions'); | ||
var toString = require('nlcst-to-string') | ||
var modifyChildren = require('unist-util-modify-children') | ||
var expressions = require('../expressions') | ||
module.exports = modifyChildren(mergeInnerWordSymbol); | ||
module.exports = modifyChildren(mergeInnerWordSymbol) | ||
/* Symbols part of surrounding words. */ | ||
var INNER_WORD_SYMBOL = expressions.wordSymbolInner; | ||
var INNER_WORD_SYMBOL = expressions.wordSymbolInner | ||
/* Merge words joined by certain punctuation marks. */ | ||
function mergeInnerWordSymbol(child, index, parent) { | ||
var siblings; | ||
var sibling; | ||
var prev; | ||
var last; | ||
var position; | ||
var tokens; | ||
var queue; | ||
var siblings | ||
var sibling | ||
var prev | ||
var last | ||
var position | ||
var tokens | ||
var queue | ||
if (index !== 0 && (child.type === 'SymbolNode' || child.type === 'PunctuationNode')) { | ||
siblings = parent.children; | ||
prev = siblings[index - 1]; | ||
if ( | ||
index !== 0 && | ||
(child.type === 'SymbolNode' || child.type === 'PunctuationNode') | ||
) { | ||
siblings = parent.children | ||
prev = siblings[index - 1] | ||
if (prev && prev.type === 'WordNode') { | ||
position = index - 1; | ||
position = index - 1 | ||
tokens = []; | ||
queue = []; | ||
tokens = [] | ||
queue = [] | ||
@@ -40,18 +43,16 @@ /* - If a token which is neither word nor | ||
while (siblings[++position]) { | ||
sibling = siblings[position]; | ||
sibling = siblings[position] | ||
if (sibling.type === 'WordNode') { | ||
tokens = tokens.concat(queue, sibling.children); | ||
tokens = tokens.concat(queue, sibling.children) | ||
queue = []; | ||
queue = [] | ||
} else if ( | ||
( | ||
sibling.type === 'SymbolNode' || | ||
sibling.type === 'PunctuationNode' | ||
) && | ||
(sibling.type === 'SymbolNode' || | ||
sibling.type === 'PunctuationNode') && | ||
INNER_WORD_SYMBOL.test(toString(sibling)) | ||
) { | ||
queue.push(sibling); | ||
queue.push(sibling) | ||
} else { | ||
break; | ||
break | ||
} | ||
@@ -64,3 +65,3 @@ } | ||
if (queue.length !== 0) { | ||
position -= queue.length; | ||
position -= queue.length | ||
} | ||
@@ -70,12 +71,12 @@ | ||
* marks and children of words. */ | ||
siblings.splice(index, position - index); | ||
siblings.splice(index, position - index) | ||
/* Add all found tokens to `prev`s children. */ | ||
prev.children = prev.children.concat(tokens); | ||
prev.children = prev.children.concat(tokens) | ||
last = tokens[tokens.length - 1]; | ||
last = tokens[tokens.length - 1] | ||
/* Update position. */ | ||
if (prev.position && last.position) { | ||
prev.position.end = last.position.end; | ||
prev.position.end = last.position.end | ||
} | ||
@@ -85,3 +86,3 @@ | ||
* position. */ | ||
return index; | ||
return index | ||
} | ||
@@ -88,0 +89,0 @@ } |
@@ -1,6 +0,6 @@ | ||
'use strict'; | ||
'use strict' | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var modifyChildren = require('unist-util-modify-children') | ||
module.exports = modifyChildren(mergeNonWordSentences); | ||
module.exports = modifyChildren(mergeNonWordSentences) | ||
@@ -10,24 +10,24 @@ /* Merge a sentence into the following sentence, when | ||
function mergeNonWordSentences(child, index, parent) { | ||
var children = child.children; | ||
var position = -1; | ||
var prev; | ||
var next; | ||
var children = child.children | ||
var position = -1 | ||
var prev | ||
var next | ||
while (children[++position]) { | ||
if (children[position].type === 'WordNode') { | ||
return; | ||
return | ||
} | ||
} | ||
prev = parent.children[index - 1]; | ||
prev = parent.children[index - 1] | ||
if (prev) { | ||
prev.children = prev.children.concat(children); | ||
prev.children = prev.children.concat(children) | ||
/* Remove the child. */ | ||
parent.children.splice(index, 1); | ||
parent.children.splice(index, 1) | ||
/* Patch position. */ | ||
if (prev.position && child.position) { | ||
prev.position.end = child.position.end; | ||
prev.position.end = child.position.end | ||
} | ||
@@ -38,18 +38,18 @@ | ||
* next node). */ | ||
return index; | ||
return index | ||
} | ||
next = parent.children[index + 1]; | ||
next = parent.children[index + 1] | ||
if (next) { | ||
next.children = children.concat(next.children); | ||
next.children = children.concat(next.children) | ||
/* Patch position. */ | ||
if (next.position && child.position) { | ||
next.position.start = child.position.start; | ||
next.position.start = child.position.start | ||
} | ||
/* Remove the child. */ | ||
parent.children.splice(index, 1); | ||
parent.children.splice(index, 1) | ||
} | ||
} |
@@ -1,7 +0,7 @@ | ||
'use strict'; | ||
'use strict' | ||
var toString = require('nlcst-to-string'); | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var toString = require('nlcst-to-string') | ||
var modifyChildren = require('unist-util-modify-children') | ||
module.exports = modifyChildren(mergePrefixExceptions); | ||
module.exports = modifyChildren(mergePrefixExceptions) | ||
@@ -15,3 +15,2 @@ /* Blacklist of full stop characters that should not | ||
'[a-z]|' + | ||
/* Common Latin Abbreviations: | ||
@@ -28,4 +27,4 @@ * Based on: http://en.wikipedia.org/wiki/List_of_Latin_abbreviations | ||
'fl|ibid|id|nem|op|pro|seq|sic|stat|tem|viz' + | ||
')$' | ||
); | ||
')$' | ||
) | ||
@@ -35,12 +34,12 @@ /* Merge a sentence into its next sentence, when the | ||
function mergePrefixExceptions(child, index, parent) { | ||
var children = child.children; | ||
var period; | ||
var node; | ||
var next; | ||
var children = child.children | ||
var period | ||
var node | ||
var next | ||
if (children && children.length > 1) { | ||
period = children[children.length - 1]; | ||
period = children[children.length - 1] | ||
if (period && toString(period) === '.') { | ||
node = children[children.length - 2]; | ||
node = children[children.length - 2] | ||
@@ -53,25 +52,25 @@ if ( | ||
/* Merge period into abbreviation. */ | ||
node.children.push(period); | ||
children.pop(); | ||
node.children.push(period) | ||
children.pop() | ||
/* Update position. */ | ||
if (period.position && node.position) { | ||
node.position.end = period.position.end; | ||
node.position.end = period.position.end | ||
} | ||
/* Merge sentences. */ | ||
next = parent.children[index + 1]; | ||
next = parent.children[index + 1] | ||
if (next) { | ||
child.children = children.concat(next.children); | ||
child.children = children.concat(next.children) | ||
parent.children.splice(index + 1, 1); | ||
parent.children.splice(index + 1, 1) | ||
/* Update position. */ | ||
if (next.position && child.position) { | ||
child.position.end = next.position.end; | ||
child.position.end = next.position.end | ||
} | ||
/* Next, iterate over the current node again. */ | ||
return index - 1; | ||
return index - 1 | ||
} | ||
@@ -78,0 +77,0 @@ } |
@@ -1,8 +0,8 @@ | ||
'use strict'; | ||
'use strict' | ||
var toString = require('nlcst-to-string'); | ||
var visitChildren = require('unist-util-visit-children'); | ||
var expressions = require('../expressions'); | ||
var toString = require('nlcst-to-string') | ||
var visitChildren = require('unist-util-visit-children') | ||
var expressions = require('../expressions') | ||
module.exports = visitChildren(mergeRemainingFullStops); | ||
module.exports = visitChildren(mergeRemainingFullStops) | ||
@@ -12,3 +12,3 @@ /* Blacklist of full stop characters that should not | ||
* case-insensitive abbreviation. */ | ||
var TERMINAL_MARKER = expressions.terminalMarker; | ||
var TERMINAL_MARKER = expressions.terminalMarker | ||
@@ -19,14 +19,17 @@ /* Merge non-terminal-marker full stops into | ||
function mergeRemainingFullStops(child) { | ||
var children = child.children; | ||
var position = children.length; | ||
var hasFoundDelimiter = false; | ||
var grandchild; | ||
var prev; | ||
var next; | ||
var nextNext; | ||
var children = child.children | ||
var position = children.length | ||
var hasFoundDelimiter = false | ||
var grandchild | ||
var prev | ||
var next | ||
var nextNext | ||
while (children[--position]) { | ||
grandchild = children[position]; | ||
grandchild = children[position] | ||
if (grandchild.type !== 'SymbolNode' && grandchild.type !== 'PunctuationNode') { | ||
if ( | ||
grandchild.type !== 'SymbolNode' && | ||
grandchild.type !== 'PunctuationNode' | ||
) { | ||
/* This is a sentence without terminal marker, | ||
@@ -36,6 +39,6 @@ * so we 'fool' the code to make it think we | ||
if (grandchild.type === 'WordNode') { | ||
hasFoundDelimiter = true; | ||
hasFoundDelimiter = true | ||
} | ||
continue; | ||
continue | ||
} | ||
@@ -45,3 +48,3 @@ | ||
if (!TERMINAL_MARKER.test(toString(grandchild))) { | ||
continue; | ||
continue | ||
} | ||
@@ -53,5 +56,5 @@ | ||
if (!hasFoundDelimiter) { | ||
hasFoundDelimiter = true; | ||
hasFoundDelimiter = true | ||
continue; | ||
continue | ||
} | ||
@@ -61,10 +64,10 @@ | ||
if (toString(grandchild) !== '.') { | ||
continue; | ||
continue | ||
} | ||
prev = children[position - 1]; | ||
next = children[position + 1]; | ||
prev = children[position - 1] | ||
next = children[position + 1] | ||
if (prev && prev.type === 'WordNode') { | ||
nextNext = children[position + 2]; | ||
nextNext = children[position + 2] | ||
@@ -80,28 +83,28 @@ /* Continue when the full stop is followed by | ||
) { | ||
continue; | ||
continue | ||
} | ||
/* Remove `child` from parent. */ | ||
children.splice(position, 1); | ||
children.splice(position, 1) | ||
/* Add the punctuation mark at the end of the | ||
* previous node. */ | ||
prev.children.push(grandchild); | ||
prev.children.push(grandchild) | ||
/* Update position. */ | ||
if (grandchild.position && prev.position) { | ||
prev.position.end = grandchild.position.end; | ||
prev.position.end = grandchild.position.end | ||
} | ||
position--; | ||
position-- | ||
} else if (next && next.type === 'WordNode') { | ||
/* Remove `child` from parent. */ | ||
children.splice(position, 1); | ||
children.splice(position, 1) | ||
/* Add the punctuation mark at the start of | ||
* the next node. */ | ||
next.children.unshift(grandchild); | ||
next.children.unshift(grandchild) | ||
if (grandchild.position && next.position) { | ||
next.position.start = grandchild.position.start; | ||
next.position.start = grandchild.position.start | ||
} | ||
@@ -108,0 +111,0 @@ } |
@@ -1,6 +0,6 @@ | ||
'use strict'; | ||
'use strict' | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var modifyChildren = require('unist-util-modify-children') | ||
module.exports = modifyChildren(mergeFinalWordSymbol); | ||
module.exports = modifyChildren(mergeFinalWordSymbol) | ||
@@ -12,25 +12,25 @@ /* Merge multiple words. This merges the children of | ||
function mergeFinalWordSymbol(child, index, parent) { | ||
var siblings = parent.children; | ||
var next; | ||
var siblings = parent.children | ||
var next | ||
if (child.type === 'WordNode') { | ||
next = siblings[index + 1]; | ||
next = siblings[index + 1] | ||
if (next && next.type === 'WordNode') { | ||
/* Remove `next` from parent. */ | ||
siblings.splice(index + 1, 1); | ||
siblings.splice(index + 1, 1) | ||
/* Add the punctuation mark at the end of the | ||
* previous node. */ | ||
child.children = child.children.concat(next.children); | ||
child.children = child.children.concat(next.children) | ||
/* Update position. */ | ||
if (next.position && child.position) { | ||
child.position.end = next.position.end; | ||
child.position.end = next.position.end | ||
} | ||
/* Next, re-iterate the current node. */ | ||
return index; | ||
return index | ||
} | ||
} | ||
} |
@@ -1,6 +0,6 @@ | ||
'use strict'; | ||
'use strict' | ||
var visitChildren = require('unist-util-visit-children'); | ||
var visitChildren = require('unist-util-visit-children') | ||
module.exports = visitChildren(patchPosition); | ||
module.exports = visitChildren(patchPosition) | ||
@@ -10,20 +10,20 @@ /* Patch the position on a parent node based on its first | ||
function patchPosition(child, index, node) { | ||
var siblings = node.children; | ||
var siblings = node.children | ||
if (!child.position) { | ||
return; | ||
return | ||
} | ||
if (index === 0 && (!node.position || /* istanbul ignore next */ !node.position.start)) { | ||
patch(node); | ||
node.position.start = child.position.start; | ||
} | ||
if ( | ||
index === siblings.length - 1 && | ||
(!node.position || !node.position.end) | ||
index === 0 && | ||
(!node.position || /* istanbul ignore next */ !node.position.start) | ||
) { | ||
patch(node); | ||
node.position.end = child.position.end; | ||
patch(node) | ||
node.position.start = child.position.start | ||
} | ||
if (index === siblings.length - 1 && (!node.position || !node.position.end)) { | ||
patch(node) | ||
node.position.end = child.position.end | ||
} | ||
} | ||
@@ -35,4 +35,4 @@ | ||
if (!node.position) { | ||
node.position = {}; | ||
node.position = {} | ||
} | ||
} |
@@ -1,6 +0,6 @@ | ||
'use strict'; | ||
'use strict' | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var modifyChildren = require('unist-util-modify-children') | ||
module.exports = modifyChildren(removeEmptyNodes); | ||
module.exports = modifyChildren(removeEmptyNodes) | ||
@@ -10,3 +10,3 @@ /* Remove empty children. */ | ||
if ('children' in child && child.children.length === 0) { | ||
parent.children.splice(index, 1); | ||
parent.children.splice(index, 1) | ||
@@ -16,4 +16,4 @@ /* Next, iterate over the node *now* at | ||
* next node). */ | ||
return index; | ||
return index | ||
} | ||
} |
@@ -1,6 +0,6 @@ | ||
'use strict'; | ||
'use strict' | ||
var toString = require('nlcst-to-string'); | ||
var toString = require('nlcst-to-string') | ||
module.exports = tokenizerFactory; | ||
module.exports = tokenizerFactory | ||
@@ -10,16 +10,16 @@ /* Factory to create a tokenizer based on a given | ||
function tokenizerFactory(childType, expression) { | ||
return tokenizer; | ||
return tokenizer | ||
/* A function that splits. */ | ||
function tokenizer(node) { | ||
var children = []; | ||
var tokens = node.children; | ||
var type = node.type; | ||
var length = tokens.length; | ||
var index = -1; | ||
var lastIndex = length - 1; | ||
var start = 0; | ||
var first; | ||
var last; | ||
var parent; | ||
var children = [] | ||
var tokens = node.children | ||
var type = node.type | ||
var length = tokens.length | ||
var index = -1 | ||
var lastIndex = length - 1 | ||
var start = 0 | ||
var first | ||
var last | ||
var parent | ||
@@ -29,9 +29,7 @@ while (++index < length) { | ||
index === lastIndex || | ||
( | ||
tokens[index].type === childType && | ||
expression.test(toString(tokens[index])) | ||
) | ||
(tokens[index].type === childType && | ||
expression.test(toString(tokens[index]))) | ||
) { | ||
first = tokens[start]; | ||
last = tokens[index]; | ||
first = tokens[start] | ||
last = tokens[index] | ||
@@ -41,3 +39,3 @@ parent = { | ||
children: tokens.slice(start, index + 1) | ||
}; | ||
} | ||
@@ -48,13 +46,13 @@ if (first.position && last.position) { | ||
end: last.position.end | ||
}; | ||
} | ||
} | ||
children.push(parent); | ||
children.push(parent) | ||
start = index + 1; | ||
start = index + 1 | ||
} | ||
} | ||
return children; | ||
return children | ||
} | ||
} |
{ | ||
"name": "parse-latin", | ||
"version": "4.1.0", | ||
"version": "4.1.1", | ||
"description": "Latin-script (natural language) parser", | ||
@@ -14,3 +14,3 @@ "license": "MIT", | ||
], | ||
"repository": "https://github.com/wooorm/parse-latin", | ||
"repository": "wooorm/parse-latin", | ||
"bugs": "https://github.com/wooorm/parse-latin/issues", | ||
@@ -31,3 +31,3 @@ "author": "Titus Wormer <tituswormer@gmail.com> (http://wooorm.com)", | ||
"devDependencies": { | ||
"browserify": "^14.0.0", | ||
"browserify": "^16.0.0", | ||
"bundle-collapser": "^1.2.1", | ||
@@ -39,5 +39,6 @@ "esmangle": "^1.0.1", | ||
"nyc": "^11.0.0", | ||
"prettier": "^1.12.1", | ||
"regenerate": "^1.3.1", | ||
"remark-cli": "^4.0.0", | ||
"remark-preset-wooorm": "^3.0.0", | ||
"remark-cli": "^5.0.0", | ||
"remark-preset-wooorm": "^4.0.0", | ||
"tape": "^4.0.0", | ||
@@ -47,14 +48,13 @@ "unicode-8.0.0": "^0.7.0", | ||
"vfile": "^2.0.0", | ||
"xo": "^0.18.0" | ||
"xo": "^0.20.0" | ||
}, | ||
"scripts": { | ||
"build-md": "remark . -qfo", | ||
"build-expressions": "node script/build-expressions", | ||
"generate": "node script/build-expressions", | ||
"format": "remark . -qfo && prettier --write '**/*.js' && xo --fix", | ||
"build-bundle": "browserify index.js -p bundle-collapser/plugin --bare -s ParseLatin > parse-latin.js", | ||
"build-mangle": "esmangle parse-latin.js > parse-latin.min.js", | ||
"build": "npm run build-expressions && npm run build-md && npm run build-bundle && npm run build-mangle", | ||
"lint": "xo", | ||
"test-api": "node test/index.js", | ||
"build": "npm run build-bundle && npm run build-mangle", | ||
"test-api": "node test", | ||
"test-coverage": "nyc --reporter lcov tape test/index.js", | ||
"test": "npm run build && npm run lint && npm run test-coverage" | ||
"test": "npm run generate && npm run format && npm run build && npm run test-coverage" | ||
}, | ||
@@ -67,4 +67,12 @@ "nyc": { | ||
}, | ||
"prettier": { | ||
"tabWidth": 2, | ||
"useTabs": false, | ||
"singleQuote": true, | ||
"bracketSpacing": false, | ||
"semi": false, | ||
"trailingComma": "none" | ||
}, | ||
"xo": { | ||
"space": true, | ||
"prettier": true, | ||
"esnext": false, | ||
@@ -74,3 +82,8 @@ "rules": { | ||
"guard-for-in": "off", | ||
"max-depth": "off" | ||
"max-depth": "off", | ||
"no-var": "off", | ||
"object-shorthand": "off", | ||
"prefer-arrow-callback": "off", | ||
"unicorn/number-literal-case": "off", | ||
"unicorn/no-hex-escape": "off" | ||
}, | ||
@@ -77,0 +90,0 @@ "ignores": [ |
@@ -25,8 +25,8 @@ # parse-latin [![Build Status][travis-badge]][travis] [![Coverage Status][codecov-badge]][codecov] [![Chat][chat-badge]][chat] | ||
```javascript | ||
var inspect = require('unist-util-inspect'); | ||
var Latin = require('parse-latin'); | ||
var inspect = require('unist-util-inspect') | ||
var Latin = require('parse-latin') | ||
var tree = new Latin().parse('A simple sentence.'); | ||
var tree = new Latin().parse('A simple sentence.') | ||
console.log(inspect(tree)); | ||
console.log(inspect(tree)) | ||
``` | ||
@@ -33,0 +33,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No repository
Supply chain riskPackage does not have a linked source code repository. Without this field, a package will have no reference to the location of the source code use to generate the package.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No repository
Supply chain riskPackage does not have a linked source code repository. Without this field, a package will have no reference to the location of the source code use to generate the package.
Found 1 instance in 1 package
1287
61314
16