parse-english
Advanced tools
Comparing version 4.1.0 to 4.1.1
144
index.js
@@ -1,16 +0,16 @@ | ||
'use strict'; | ||
'use strict' | ||
var Parser = require('parse-latin'); | ||
var toString = require('nlcst-to-string'); | ||
var visitChildren = require('unist-util-visit-children'); | ||
var modifyChildren = require('unist-util-modify-children'); | ||
var Parser = require('parse-latin') | ||
var toString = require('nlcst-to-string') | ||
var visitChildren = require('unist-util-visit-children') | ||
var modifyChildren = require('unist-util-modify-children') | ||
module.exports = ParseEnglish; | ||
module.exports = ParseEnglish | ||
/* Inherit from `ParseLatin`. */ | ||
ParserPrototype.prototype = Parser.prototype; | ||
ParserPrototype.prototype = Parser.prototype | ||
var proto = new ParserPrototype(); | ||
var proto = new ParserPrototype() | ||
ParseEnglish.prototype = proto; | ||
ParseEnglish.prototype = proto | ||
@@ -20,7 +20,7 @@ /* Add modifiers to `parser`. */ | ||
visitChildren(mergeEnglishElisionExceptions) | ||
].concat(proto.tokenizeSentencePlugins); | ||
].concat(proto.tokenizeSentencePlugins) | ||
proto.tokenizeParagraphPlugins = [ | ||
modifyChildren(mergeEnglishPrefixExceptions) | ||
].concat(proto.tokenizeParagraphPlugins); | ||
].concat(proto.tokenizeParagraphPlugins) | ||
@@ -30,6 +30,6 @@ /* Transform English natural language into an NLCST-tree. */ | ||
if (!(this instanceof ParseEnglish)) { | ||
return new ParseEnglish(doc, file); | ||
return new ParseEnglish(doc, file) | ||
} | ||
Parser.apply(this, arguments); | ||
Parser.apply(this, arguments) | ||
} | ||
@@ -48,3 +48,2 @@ | ||
'inc|ltd|' + | ||
/* English unit abbreviations: | ||
@@ -61,3 +60,2 @@ * - Note that *Metric abbreviations* do not use | ||
'tsp|yds?|' + | ||
/* Abbreviations of time references: | ||
@@ -71,3 +69,3 @@ * seconds, minutes, hours, Monday, Tuesday, *, Wednesday, | ||
'apr|jun|jul|aug|sep|sept|oct|nov|dec' + | ||
')$' | ||
')$' | ||
/* | ||
@@ -77,3 +75,3 @@ * NOTE! There's no `i` flag here because the value to | ||
*/ | ||
); | ||
) | ||
@@ -89,3 +87,2 @@ /* Match a blacklisted (case-sensitive) abbreviation | ||
'Mr|Mrs|Miss|Ms|Mss|Mses|Mlle|Mme|M|Messrs|Mmes|Jr|Sr|Snr|' + | ||
/* Rank and academic: | ||
@@ -96,3 +93,2 @@ * Doctor, Magister, Attorney, Profesor, Honourable, Reverend, | ||
'Dr|Mgr|Atty|Prof|Hon|Rev|Fr|Msgr|Sr|Br|St|Pres|Supt|Rep|Sen|' + | ||
/* Rank and military: | ||
@@ -103,3 +99,2 @@ * Governor, Ambassador, Treasurer, Secretary, Admiral, Brigadier, | ||
'Gov|Amb|Treas|Sec|Amd|Brig|Gen|Cdr|Col|Capt|Lt|Maj|Sgt|Po|Wo|Ph|' + | ||
/* Common geographical abbreviations: | ||
@@ -112,3 +107,2 @@ * | ||
'Ft|Pen|Terr|Hwy|Fwy|Pkwy|' + | ||
/* American state abbreviations: | ||
@@ -126,3 +120,2 @@ * Alabama, Arizona, Arkansas, California, *, Colorado, *, | ||
'Wash|Wis|Wisc|Wyo|' + | ||
/* Canadian province abbreviations: | ||
@@ -132,3 +125,2 @@ * Alberta, Manitoba, Ontario, Quebec, *, Saskatchewan, | ||
'Alta|Man|Ont|Qu\u00E9|Que|Sask|Yuk|' + | ||
/* English county abbreviations: | ||
@@ -148,4 +140,4 @@ * Bedfordshire, Berkshire, Buckinghamshire, Cambridgeshire, | ||
'Staffs|Staf|Suff|Sy|Sx|Ssx|Warks|War|Warw|Westm|Wilts|Worcs|Yorks' + | ||
')$' | ||
); | ||
')$' | ||
) | ||
@@ -160,4 +152,4 @@ /* Match a blacklisted word which when followed by | ||
'o|ol' + | ||
')$' | ||
); | ||
')$' | ||
) | ||
@@ -174,3 +166,2 @@ /* Match a blacklisted word which when preceded by | ||
'im|er|em|cause|' + | ||
/* Includes: | ||
@@ -181,11 +172,10 @@ * - 'twas > it was; | ||
'twas|tis|twere|' + | ||
/* Matches groups of year, optionally followed | ||
* by an `s`. */ | ||
'\\d\\ds?' + | ||
')$' | ||
); | ||
')$' | ||
) | ||
/* Match one apostrophe. */ | ||
var APOSTROPHE = /^['\u2019]$/; | ||
var APOSTROPHE = /^['\u2019]$/ | ||
@@ -195,35 +185,35 @@ /* Merge a sentence into its next sentence, | ||
function mergeEnglishPrefixExceptions(sentence, index, paragraph) { | ||
var children = sentence.children; | ||
var period = children[children.length - 1]; | ||
var word = children[children.length - 2]; | ||
var value; | ||
var next; | ||
var children = sentence.children | ||
var period = children[children.length - 1] | ||
var word = children[children.length - 2] | ||
var value | ||
var next | ||
if (period && toString(period) === '.' && word && word.type === 'WordNode') { | ||
value = toString(word); | ||
value = toString(word) | ||
if (ABBREVIATION.test(lower(value)) || ABBREVIATION_SENSITIVE.test(value)) { | ||
/* Merge period into abbreviation. */ | ||
word.children.push(period); | ||
children.pop(); | ||
word.children.push(period) | ||
children.pop() | ||
if (period.position && word.position) { | ||
word.position.end = period.position.end; | ||
word.position.end = period.position.end | ||
} | ||
/* Merge sentences. */ | ||
next = paragraph.children[index + 1]; | ||
next = paragraph.children[index + 1] | ||
if (next) { | ||
sentence.children = children.concat(next.children); | ||
sentence.children = children.concat(next.children) | ||
paragraph.children.splice(index + 1, 1); | ||
paragraph.children.splice(index + 1, 1) | ||
/* Update position. */ | ||
if (next.position && sentence.position) { | ||
sentence.position.end = next.position.end; | ||
sentence.position.end = next.position.end | ||
} | ||
/* Next, iterate over the current node again. */ | ||
return index - 1; | ||
return index - 1 | ||
} | ||
@@ -237,31 +227,31 @@ } | ||
function mergeEnglishElisionExceptions(child, index, sentence) { | ||
var siblings; | ||
var sibling; | ||
var other; | ||
var length; | ||
var value; | ||
var siblings | ||
var sibling | ||
var other | ||
var length | ||
var value | ||
if (child.type !== 'PunctuationNode' && child.type !== 'SymbolNode') { | ||
return; | ||
return | ||
} | ||
siblings = sentence.children; | ||
length = siblings.length; | ||
value = toString(child); | ||
siblings = sentence.children | ||
length = siblings.length | ||
value = toString(child) | ||
/* Match abbreviation of `with`, `w/` */ | ||
if (value === '/') { | ||
sibling = siblings[index - 1]; | ||
sibling = siblings[index - 1] | ||
if (sibling && lower(toString(sibling)) === 'w') { | ||
/* Remove the slash from the sentence. */ | ||
siblings.splice(index, 1); | ||
siblings.splice(index, 1) | ||
/* Append the slash into the children of the | ||
* previous node. */ | ||
sibling.children.push(child); | ||
sibling.children.push(child) | ||
/* Update position. */ | ||
if (sibling.position && child.position) { | ||
sibling.position.end = child.position.end; | ||
sibling.position.end = child.position.end | ||
} | ||
@@ -273,3 +263,3 @@ } | ||
* nodes exist... */ | ||
sibling = siblings[index - 1]; | ||
sibling = siblings[index - 1] | ||
@@ -285,14 +275,14 @@ if ( | ||
/* Remove the apostrophe from the sentence. */ | ||
siblings.splice(index, 1); | ||
siblings.splice(index, 1) | ||
/* Append the apostrophe into the children of | ||
* node. */ | ||
sibling.children.push(child); | ||
sibling.children.push(child) | ||
/* Update position. */ | ||
if (sibling.position && child.position) { | ||
sibling.position.end = child.position.end; | ||
sibling.position.end = child.position.end | ||
} | ||
return; | ||
return | ||
} | ||
@@ -307,18 +297,18 @@ | ||
) { | ||
sibling = siblings[index + 1]; | ||
value = lower(toString(sibling)); | ||
sibling = siblings[index + 1] | ||
value = lower(toString(sibling)) | ||
if (ELISION_AFFIX.test(value)) { | ||
/* Remove the apostrophe from the sentence. */ | ||
siblings.splice(index, 1); | ||
siblings.splice(index, 1) | ||
/* Prepend the apostrophe into the children of | ||
* node. */ | ||
sibling.children = [child].concat(sibling.children); | ||
sibling.children = [child].concat(sibling.children) | ||
/* Update position. */ | ||
if (sibling.position && child.position) { | ||
sibling.position.start = child.position.start; | ||
sibling.position.start = child.position.start | ||
} | ||
/* If both preceded and followed by an apostrophe, | ||
/* If both preceded and followed by an apostrophe, | ||
* and the word is `n`... */ | ||
@@ -330,7 +320,7 @@ } else if ( | ||
) { | ||
other = siblings[index + 2]; | ||
other = siblings[index + 2] | ||
/* Remove the apostrophe from the sentence. */ | ||
siblings.splice(index, 1); | ||
siblings.splice(index + 1, 1); | ||
siblings.splice(index, 1) | ||
siblings.splice(index + 1, 1) | ||
@@ -340,3 +330,3 @@ /* Prepend the preceding apostrophe and append | ||
* the children of node. */ | ||
sibling.children = [child].concat(sibling.children, other); | ||
sibling.children = [child].concat(sibling.children, other) | ||
@@ -347,3 +337,3 @@ /* Update position. */ | ||
if (child.position) { | ||
sibling.position.start = child.position.start; | ||
sibling.position.start = child.position.start | ||
} | ||
@@ -353,3 +343,3 @@ | ||
if (other.position) { | ||
sibling.position.end = other.position.end; | ||
sibling.position.end = other.position.end | ||
} | ||
@@ -363,3 +353,3 @@ } | ||
function lower(value) { | ||
return value.toLowerCase(); | ||
return value.toLowerCase() | ||
} |
{ | ||
"name": "parse-english", | ||
"version": "4.1.0", | ||
"version": "4.1.1", | ||
"description": "English natural language parser", | ||
@@ -13,3 +13,3 @@ "license": "MIT", | ||
], | ||
"repository": "https://github.com/wooorm/parse-english", | ||
"repository": "wooorm/parse-english", | ||
"bugs": "https://github.com/wooorm/parse-english/issues", | ||
@@ -30,3 +30,3 @@ "author": "Titus Wormer <tituswormer@gmail.com> (http://wooorm.com)", | ||
"devDependencies": { | ||
"browserify": "^14.0.0", | ||
"browserify": "^16.0.0", | ||
"bundle-collapser": "^1.2.1", | ||
@@ -38,18 +38,18 @@ "esmangle": "^1.0.1", | ||
"nyc": "^11.0.0", | ||
"remark-cli": "^4.0.0", | ||
"remark-preset-wooorm": "^3.0.0", | ||
"prettier": "^1.12.1", | ||
"remark-cli": "^5.0.0", | ||
"remark-preset-wooorm": "^4.0.0", | ||
"tape": "^4.0.0", | ||
"unist-util-remove-position": "^1.1.0", | ||
"vfile": "^2.0.0", | ||
"xo": "^0.18.0" | ||
"xo": "^0.20.0" | ||
}, | ||
"scripts": { | ||
"build-md": "remark . -qfo", | ||
"format": "remark . -qfo && prettier --write '**/*.js' && xo --fix", | ||
"build-bundle": "browserify index.js -p bundle-collapser/plugin --bare -s ParseEnglish > parse-english.js", | ||
"build-mangle": "esmangle parse-english.js > parse-english.min.js", | ||
"build": "npm run build-md && npm run build-bundle && npm run build-mangle", | ||
"lint": "xo", | ||
"build": "npm run build-bundle && npm run build-mangle", | ||
"test-api": "node test", | ||
"test-coverage": "nyc --reporter lcov tape test/index.js", | ||
"test": "npm run build && npm run lint && npm run test-coverage" | ||
"test": "npm run format && npm run build && npm run test-coverage" | ||
}, | ||
@@ -62,7 +62,18 @@ "nyc": { | ||
}, | ||
"prettier": { | ||
"tabWidth": 2, | ||
"useTabs": false, | ||
"singleQuote": true, | ||
"bracketSpacing": false, | ||
"semi": false, | ||
"trailingComma": "none" | ||
}, | ||
"xo": { | ||
"space": true, | ||
"prettier": true, | ||
"esnext": false, | ||
"rules": { | ||
"max-depth": "off" | ||
"max-depth": "off", | ||
"complexity": "off", | ||
"no-var": "off", | ||
"prefer-arrow-callback": "off" | ||
}, | ||
@@ -69,0 +80,0 @@ "ignores": [ |
@@ -17,8 +17,10 @@ # parse-english [![Build Status][travis-badge]][travis] [![Coverage Status][codecov-badge]][codecov] [![Chat][chat-badge]][chat] | ||
```javascript | ||
var inspect = require('unist-util-inspect'); | ||
var English = require('parse-english'); | ||
var inspect = require('unist-util-inspect') | ||
var English = require('parse-english') | ||
var tree = new English().parse('Mr. Henry Brown: A hapless but friendly City of London worker.'); | ||
var tree = new English().parse( | ||
'Mr. Henry Brown: A hapless but friendly City of London worker.' | ||
) | ||
console.log(inspect(tree)); | ||
console.log(inspect(tree)) | ||
``` | ||
@@ -25,0 +27,0 @@ |
No repository
Supply chain riskPackage does not have a linked source code repository. Without this field, a package will have no reference to the location of the source code use to generate the package.
Found 1 instance in 1 package
No repository
Supply chain riskPackage does not have a linked source code repository. Without this field, a package will have no reference to the location of the source code use to generate the package.
Found 1 instance in 1 package
17993
121
14