compromise
Advanced tools
Comparing version 14.10.0 to 14.10.1
@@ -5,3 +5,3 @@ { | ||
"description": "modest natural language processing", | ||
"version": "14.10.0", | ||
"version": "14.10.1", | ||
"module": "./src/three.js", | ||
@@ -80,3 +80,3 @@ "main": "./src/three.js", | ||
"flame": "clinic flame -- node ./scripts/perf/flame", | ||
"lint": "eslint ./src/**/* && eslint ./plugins/**/src/*", | ||
"lint": "eslint ./src/**/*", | ||
"plugins:ci": "node ./scripts/plugins.js npm ci", | ||
@@ -111,3 +111,3 @@ "plugins:build": "node ./scripts/plugins.js npm run build" | ||
"cross-env": "^7.0.3", | ||
"eslint": "8.46.0", | ||
"eslint": "8.53.0", | ||
"eslint-plugin-regexp": "1.15.0", | ||
@@ -119,3 +119,3 @@ "nlp-corpus": "4.4.0", | ||
"tap-dancer": "0.3.4", | ||
"tape": "5.6.6" | ||
"tape": "5.7.2" | ||
}, | ||
@@ -122,0 +122,0 @@ "eslintIgnore": [ |
@@ -42,3 +42,2 @@ <div align="center"> | ||
<div align="left"> | ||
@@ -53,4 +52,5 @@ don't you find it strange, | ||
<i>↬<sub>ᔐᖜ</sub><b>↬</b></i> <sub></sub> | ||
and how hard it is to actually <b>parse</b> and <i>use</i>? | ||
<i>↬<sub>ᔐᖜ</sub><b>↬</b></i> <sub></sub> | ||
and how hard it is to actually <b>parse</b> and <i>use</i>? | ||
</ul> | ||
@@ -73,3 +73,2 @@ </div> | ||
<!-- | ||
@@ -84,3 +83,2 @@ it is | ||
```js | ||
@@ -112,5 +110,4 @@ import nlp from 'compromise' | ||
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221814-05ed1680-ffb8-11e9-8b6b-c7528d163871.png"/> | ||
</div> | ||
</div> | ||
<div align="left"> | ||
@@ -125,2 +122,3 @@ <i>grab parts of the text:</i> | ||
``` | ||
<div align="right"> | ||
@@ -136,2 +134,3 @@ <a href="https://docs.compromise.cool/compromise-match">match docs</a> | ||
<i>and get data:</i> | ||
```js | ||
@@ -154,2 +153,3 @@ import plg from 'compromise-speech' | ||
``` | ||
<div align="right"> | ||
@@ -160,6 +160,6 @@ <a href="https://docs.compromise.cool/compromise-json">json docs</a> | ||
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221814-05ed1680-ffb8-11e9-8b6b-c7528d163871.png"/> | ||
</div> | ||
</div> | ||
<!-- spacer --> | ||
<img height="30px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
<img height="30px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
@@ -176,3 +176,3 @@ avoid the problems of brittle parsers: | ||
doc.contractions().expand() | ||
dox.text() | ||
doc.text() | ||
// 'we are not going to take it..' | ||
@@ -191,2 +191,3 @@ ``` | ||
and whip stuff around like it's data: | ||
```js | ||
@@ -209,3 +210,2 @@ let doc = nlp('ninety five thousand and fifty two') | ||
<sub>-because it actually is-</sub> | ||
@@ -227,3 +227,2 @@ | ||
<!-- spacer --> | ||
@@ -253,6 +252,6 @@ <img height="50px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
``` | ||
<img height="75px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
<!-- | ||
<!-- | ||
bragging graphs | ||
@@ -293,4 +292,3 @@ --> | ||
<!-- | ||
<!-- | ||
one/two/three parts | ||
@@ -325,3 +323,4 @@ --> | ||
<b>compromise/one</b> splits your text up, wraps it in a handy API, | ||
<b>compromise/one</b> splits your text up, wraps it in a handy API, | ||
<ul> | ||
@@ -333,7 +332,7 @@ <sub>and does nothing else -</sub> | ||
<b>/one</b> is quick - most sentences take a 10th of a millisecond. | ||
<b>/one</b> is quick - most sentences take a 10th of a millisecond. | ||
It can do <b>~1mb</b> of text a second - or 10 wikipedia pages. | ||
<i>Infinite jest</i> is takes 3s. | ||
<i>Infinite jest</i> is takes 3s. | ||
@@ -379,6 +378,4 @@ <div align="right"> | ||
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
compromise has <b>83 tags</b>, arranged in <a href="https://observablehq.com/@spencermountain/compromise-tags">a handsome graph</a>. | ||
@@ -393,2 +390,3 @@ | ||
if you prefer <a href="https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html"><i>Penn tags</i></a>, you can derive them with: | ||
```js | ||
@@ -418,2 +416,3 @@ let doc = nlp('welcome thrillho') | ||
``` | ||
<div align="right"> | ||
@@ -423,3 +422,2 @@ <a href="https://docs.compromise.cool/compromise-selections">selection docs</a> | ||
<b>compromise/three</b> is a set of tooling to <i>zoom into</i> and operate on parts of a text. | ||
@@ -430,4 +428,5 @@ | ||
When you have a phrase, or group of words, you can see additional metadata about it with `.json()` | ||
```js | ||
let doc = nlp("four out of five dentists") | ||
let doc = nlp('four out of five dentists') | ||
console.log(doc.fractions().json()) | ||
@@ -443,3 +442,3 @@ /*[{ | ||
```js | ||
let doc = nlp("$4.09CAD") | ||
let doc = nlp('$4.09CAD') | ||
doc.money().json() | ||
@@ -456,3 +455,2 @@ /*[{ | ||
## API | ||
@@ -574,3 +572,2 @@ | ||
##### Lib | ||
@@ -600,6 +597,6 @@ | ||
### compromise/two: | ||
##### Contractions | ||
- **[.contractions()](https://observablehq.com/@spencermountain/compromise-contractions)** - things like "didn't" | ||
@@ -609,10 +606,9 @@ - **[.contractions().expand()](https://observablehq.com/@spencermountain/compromise-contractions)** - things like "didn't" | ||
<!-- spacer --> | ||
<img height="30px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
### compromise/three: | ||
##### Nouns | ||
- **[.nouns()](https://observablehq.com/@spencermountain/nouns)** - return any subsequent terms tagged as a Noun | ||
@@ -628,2 +624,3 @@ - **[.nouns().json()](https://observablehq.com/@spencermountain/nouns)** - overloaded output with noun metadata | ||
##### Verbs | ||
- **[.verbs()](https://observablehq.com/@spencermountain/verbs)** - return any subsequent terms tagged as a Verb | ||
@@ -650,2 +647,3 @@ - **[.verbs().json()](https://observablehq.com/@spencermountain/verbs)** - overloaded output with verb metadata | ||
##### Numbers | ||
- **[.numbers()](https://observablehq.com/@spencermountain/compromise-values)** - grab all written and numeric values | ||
@@ -689,2 +687,3 @@ - **[.numbers().parse()](https://observablehq.com/@spencermountain/compromise-values)** - get tokenized number phrase | ||
##### Sentences | ||
- **[.sentences()](https://observablehq.com/@spencermountain/compromise-sentences)** - return a sentence class with additional methods | ||
@@ -703,2 +702,3 @@ - **[.sentences().json()](https://observablehq.com/@spencermountain/compromise-sentences)** - overloaded output with sentence metadata | ||
##### Adjectives | ||
- **[.adjectives()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'quick'` | ||
@@ -741,3 +741,3 @@ - **[.adjectives().json()](https://observablehq.com/@spencermountain/compromise-selections)** - get adjective metadata | ||
- **[.parentheses()](https://observablehq.com/@spencermountain/compromise-selections)** - return anything inside (parentheses) | ||
- **[.parentheses().strip()](https://observablehq.com/@spencermountain/compromise-selections)** - remove brackets | ||
- **[.parentheses().strip()](https://observablehq.com/@spencermountain/compromise-selections)** - remove brackets | ||
- **[.possessives()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `"Spencer's"` | ||
@@ -756,7 +756,5 @@ - **[.possessives().strip()](https://observablehq.com/@spencermountain/compromise-selections)** - "Spencer's" -> "Spencer" | ||
### .extend(): | ||
This library comes with a considerate, common-sense baseline for english grammar. | ||
This library comes with a considerate, common-sense baseline for english grammar. | ||
@@ -766,2 +764,3 @@ You're free to change, or lay-waste to any settings - which is the fun part actually. | ||
the easiest part is just to suggest tags for any given words: | ||
```js | ||
@@ -796,7 +795,7 @@ let myWords = { | ||
pastTense: 'gotten', | ||
gerund: 'gettin' | ||
gerund: 'gettin', | ||
}, | ||
}, | ||
// add new methods to compromise | ||
api: (View) => { | ||
api: View => { | ||
View.prototype.kermitVoice = function () { | ||
@@ -807,3 +806,3 @@ this.sentences().prepend('well,') | ||
} | ||
} | ||
}, | ||
}) | ||
@@ -894,6 +893,6 @@ ``` | ||
##### Comparisons | ||
- [Compromise and Spacy](https://observablehq.com/@spencermountain/compromise-and-spacy) | ||
- [Compromise and NLTK](https://observablehq.com/@spencermountain/compromise-and-NLTK) | ||
<!-- spacer --> | ||
@@ -908,3 +907,2 @@ <div align="center"> | ||
<!-- <div align="center"> | ||
@@ -935,3 +933,2 @@ <img height="50px" src="https://user-images.githubusercontent.com/399657/68221824-09809d80-ffb8-11e9-9ef0-6ed3574b0ce8.png"/> | ||
##### Stats | ||
@@ -964,3 +961,2 @@ | ||
<!-- spacer --> | ||
@@ -967,0 +963,0 @@ <div > |
@@ -1,1 +0,1 @@ | ||
export default '14.10.0' | ||
export default '14.10.1' |
@@ -71,6 +71,6 @@ import debug from './debug/index.js' | ||
let list = [] | ||
this.docs.forEach(s => { | ||
let terms = s.terms.map(t => t.text) | ||
terms = terms.filter(t => t) | ||
list = list.concat(terms) | ||
this.docs.forEach(terms => { | ||
let words = terms.map(t => t.text) | ||
words = words.filter(t => t) | ||
list = list.concat(words) | ||
}) | ||
@@ -77,0 +77,0 @@ return list |
const isAcronym = /[ .][A-Z]\.? *$/i //asci - 'n.s.a.' | ||
const hasEllipse = /(?:\u2026|\.{2,}) *$/ // '...' | ||
const hasLetter = /\p{L}/u | ||
const hasPeriod = /\. *$/ | ||
const leadInit = /^[A-Z]\. $/ // "W. Kensington" | ||
@@ -27,4 +28,4 @@ | ||
let lastWord = words[words.length - 1].toLowerCase() | ||
// check for 'Mr.' | ||
if (abbrevs.hasOwnProperty(lastWord) === true) { | ||
// check for 'Mr.' (and not mr?) | ||
if (abbrevs.hasOwnProperty(lastWord) === true && hasPeriod.test(str) === true) { | ||
return false | ||
@@ -31,0 +32,0 @@ } |
@@ -15,3 +15,3 @@ //a hugely-ignorant, and widely subjective transliteration of latin, cryllic, greek unicode characters to english ascii. | ||
d: 'ÐĎďĐđƉƊȡƋƌ', | ||
e: 'ÈÉÊËèéêëĒēĔĕĖėĘęĚěƐȄȅȆȇȨȩɆɇΈΕΞΣέεξϵЀЁЕеѐёҼҽҾҿӖӗ', | ||
e: 'ÈÉÊËèéêëĒēĔĕĖėĘęĚěƐȄȅȆȇȨȩɆɇΈΕΞΣέεξϵЀЁЕеѐёҼҽҾҿӖӗễ', | ||
f: 'ƑƒϜϝӺӻҒғſ', | ||
@@ -21,3 +21,3 @@ g: 'ĜĝĞğĠġĢģƓǤǥǦǧǴǵ', | ||
I: 'ÌÍÎÏ', | ||
i: 'ìíîïĨĩĪīĬĭĮįİıƖƗȈȉȊȋΊΐΪίιϊІЇії', | ||
i: 'ìíîïĨĩĪīĬĭĮįİıƖƗȈȉȊȋΊΐΪίιϊІЇіїi̇', | ||
j: 'ĴĵǰȷɈɉϳЈј', | ||
@@ -48,2 +48,2 @@ k: 'ĶķĸƘƙǨǩΚκЌЖКжкќҚқҜҝҞҟҠҡ', | ||
}) | ||
export default unicode | ||
export default unicode |
@@ -45,3 +45,3 @@ import splice from './_splice.js' | ||
if (isPossessive(terms, i)) { | ||
return world.methods.one.setTag([terms[i]], 'Possessive', world, '2-contraction') | ||
return world.methods.one.setTag([terms[i]], 'Possessive', world, null, '2-contraction') | ||
} | ||
@@ -58,5 +58,4 @@ return apostropheS(terms, i) | ||
//really easy ones | ||
const contractionTwo = (view) => { | ||
const contractionTwo = view => { | ||
let { world, document } = view | ||
@@ -73,3 +72,3 @@ // each sentence | ||
if (byApostrophe.test(terms[i].normal) === true) { | ||
[, after] = terms[i].normal.split(byApostrophe) | ||
after = terms[i].normal.split(byApostrophe)[1] | ||
} | ||
@@ -76,0 +75,0 @@ let words = null |
@@ -33,2 +33,4 @@ export default [ | ||
{ match: `[(march|may)] #Adverb`, group: 0, tag: 'Verb', reason: 'march-quickly' }, | ||
//12 am | ||
{ match: `#Value (am|pm)`, tag: 'Time', reason: '2-am' }, | ||
] |
@@ -32,3 +32,3 @@ import adj from './adjective/adjective.js' | ||
import orgs from './orgs/organizations.js' | ||
import orgs from './nouns/organizations.js' | ||
import places from './nouns/places.js' | ||
@@ -66,3 +66,3 @@ import conjunctions from './conjunctions.js' | ||
conjunctions, | ||
expressions, | ||
expressions | ||
) | ||
@@ -69,0 +69,0 @@ export default { |
export default [ | ||
// ==== Region ==== | ||
//West Norforlk | ||
// West Norforlk | ||
{ match: '(west|north|south|east|western|northern|southern|eastern)+ #Place', tag: 'Region', reason: 'west-norfolk' }, | ||
//some us-state acronyms (exlude: al, in, la, mo, hi, me, md, ok..) | ||
{ match: '#City [(al|ak|az|ar|ca|ct|dc|fl|ga|id|il|nv|nh|nj|ny|oh|pa|sc|tn|tx|ut|vt|pr)]', group: 0, tag: 'Region', reason: 'us-state' }, | ||
{ | ||
match: '#City [(al|ak|az|ar|ca|ct|dc|fl|ga|id|il|nv|nh|nj|ny|oh|pa|sc|tn|tx|ut|vt|pr)]', | ||
group: 0, | ||
tag: 'Region', | ||
reason: 'us-state', | ||
}, | ||
// portland oregon | ||
{ match: 'portland [or]', group: 0, tag: 'Region', reason: 'portland-or' }, | ||
//Foo District | ||
{ match: '#ProperNoun+ (district|region|province|county|prefecture|municipality|territory|burough|reservation)', tag: 'Region', reason: 'foo-district' }, | ||
//landforms - 'Foo river' | ||
{ match: '#ProperNoun+ (river|lake|bay|inlet|creek|narrows|cove|dune|coast|lagoon|beach|peninsula|hill|mountain|canyon|marsh|island|trail|valley|glacier|estuary|desert|escarpment|gorge|plains|waterfall)', tag: 'Place', reason: 'foo-river' }, | ||
//landforms - 'gulf of foo' | ||
{ match: '(river|gulf|lake) of? #ProperNoun+', tag: 'Place', reason: 'river-foo' }, | ||
//District of Foo | ||
{ match: '(district|region|province|municipality|territory|burough|state) of #ProperNoun', tag: 'Region', reason: 'district-of-Foo' }, | ||
//words removed from preTagger/placeWords | ||
{ | ||
match: '#ProperNoun+ (cliff|place|range|pit|place|point|room|grounds|ruins)', | ||
tag: 'Place', | ||
reason: 'foo-point', | ||
}, | ||
// in Foo California | ||
{ match: 'in [#ProperNoun] #Place', group: 0, tag: 'Place', reason: 'propernoun-place' }, | ||
// Address | ||
{ match: '#Value #Noun (st|street|rd|road|crescent|cr|way|tr|terrace|avenue|ave)', tag: 'Address', reason: 'address-st' }, | ||
// Address | ||
{ | ||
match: '#Value #Noun (st|street|rd|road|crescent|cr|way|tr|terrace|avenue|ave)', | ||
tag: 'Address', | ||
reason: 'address-st', | ||
}, | ||
// Sports Arenas and Complexs | ||
// { | ||
// match: | ||
// '(#Place+|#Place|#ProperNoun) (memorial|athletic|community|financial)? (sportsplex|stadium|sports centre|sports field|soccer complex|soccer centre|sports complex|civic centre|centre|arena|gardens|complex|coliseum|auditorium|place|building)', | ||
// tag: 'Place', | ||
// reason: 'sport-complex', | ||
// }, | ||
] |
// const personAdj = '(misty|rusty|dusty|rich|randy|sandy|young|earnest|frank|brown)' | ||
export default [ | ||
// ebenezer scrooge | ||
{ | ||
match: '#FirstName #Noun$', | ||
tag: '. #LastName', | ||
notIf: '(#Possessive|#Organization|#Place|#Pronoun|@hasTitleCase)', | ||
reason: 'firstname-noun', | ||
}, | ||
// ===person-date=== | ||
@@ -15,7 +23,17 @@ { match: '%Person|Date% #Acronym? #ProperNoun', tag: 'Person', reason: 'jan-thierson' }, | ||
// chuck will ... | ||
{ match: `[%Person|Verb%] (will|had|has|said|says|told|did|learned|wants|wanted)`, group: 0, tag: 'Person', reason: 'person-said' }, | ||
{ | ||
match: `[%Person|Verb%] (will|had|has|said|says|told|did|learned|wants|wanted)`, | ||
group: 0, | ||
tag: 'Person', | ||
reason: 'person-said', | ||
}, | ||
// ===person-place=== | ||
//sydney harbour | ||
{ match: `[%Person|Place%] (harbor|harbour|pier|town|city|place|dump|landfill)`, group: 0, tag: 'Place', reason: 'sydney-harbour' }, | ||
{ | ||
match: `[%Person|Place%] (harbor|harbour|pier|town|city|place|dump|landfill)`, | ||
group: 0, | ||
tag: 'Place', | ||
reason: 'sydney-harbour', | ||
}, | ||
// east sydney | ||
@@ -48,3 +66,9 @@ { match: `(west|east|north|south) [%Person|Place%]`, group: 0, tag: 'Place', reason: 'east-sydney' }, | ||
// jack layton won | ||
{ match: '(#FirstName && !#Possessive) [#Singular] #Verb', group: 0, safe: true, tag: 'LastName', reason: 'jack-layton' }, | ||
{ | ||
match: '(#FirstName && !#Possessive) [#Singular] #Verb', | ||
group: 0, | ||
safe: true, | ||
tag: 'LastName', | ||
reason: 'jack-layton', | ||
}, | ||
// sherwood anderson told | ||
@@ -54,3 +78,2 @@ { match: '^[#Singular] #Person #Verb', group: 0, safe: true, tag: 'Person', reason: 'sherwood-anderson' }, | ||
{ match: '(a|an) [#Person]$', group: 0, unTag: 'Person', reason: 'a-warhol' }, | ||
] |
@@ -6,3 +6,8 @@ export default [ | ||
//pope francis | ||
{ match: '(sister|pope|brother|father|aunt|uncle|grandpa|grandfather|grandma) #ProperNoun', tag: 'Person', reason: 'lady-titlecase', safe: true }, | ||
{ | ||
match: '(sister|pope|brother|father|aunt|uncle|grandpa|grandfather|grandma) #ProperNoun', | ||
tag: 'Person', | ||
reason: 'lady-titlecase', | ||
safe: true, | ||
}, | ||
@@ -31,3 +36,3 @@ // ==== Nickname ==== | ||
//saint Foo | ||
{ match: '(king|queen|prince|saint) #ProperNoun', tag: 'Person', reason: 'saint-foo' }, | ||
{ match: '(king|queen|prince|saint) #ProperNoun', tag: 'Person', notIf: '#Place', reason: 'saint-foo' }, | ||
@@ -52,3 +57,3 @@ // al sharpton | ||
//jose de Sucre | ||
{ match: '#ProperNoun (de|du) la? #ProperNoun', tag: 'Person', reason: 'title-de-title' }, | ||
{ match: '#ProperNoun (de|du) la? #ProperNoun', tag: 'Person', notIf: '#Place', reason: 'title-de-title' }, | ||
//Jani K. Smith | ||
@@ -59,5 +64,18 @@ { match: '#Singular #Acronym #LastName', tag: '#FirstName #Person .', reason: 'title-acro-noun', safe: true }, | ||
// john keith jones | ||
{ match: '#Person [#ProperNoun #ProperNoun]', group: 0, tag: 'Person', notIf: '#Possessive', reason: 'three-name-person', safe: true }, | ||
{ | ||
match: '#Person [#ProperNoun #ProperNoun]', | ||
group: 0, | ||
tag: 'Person', | ||
notIf: '#Possessive', | ||
reason: 'three-name-person', | ||
safe: true, | ||
}, | ||
//John Foo | ||
{ match: '#FirstName #Acronym? [#ProperNoun]', group: 0, tag: 'LastName', notIf: '#Possessive', reason: 'firstname-titlecase' }, | ||
{ | ||
match: '#FirstName #Acronym? [#ProperNoun]', | ||
group: 0, | ||
tag: 'LastName', | ||
notIf: '#Possessive', | ||
reason: 'firstname-titlecase', | ||
}, | ||
// john stewart | ||
@@ -73,9 +91,32 @@ { match: '#FirstName [#FirstName]', group: 0, tag: 'LastName', reason: 'firstname-firstname' }, | ||
// sergeant major Harold | ||
{ match: '[(lieutenant|corporal|sergeant|captain|qeen|king|admiral|major|colonel|marshal|president|queen|king)+] #ProperNoun', group: 0, tag: 'Honorific', reason: 'seargeant-john' }, | ||
{ | ||
match: | ||
'[(lieutenant|corporal|sergeant|captain|qeen|king|admiral|major|colonel|marshal|president|queen|king)+] #ProperNoun', | ||
group: 0, | ||
tag: 'Honorific', | ||
reason: 'seargeant-john', | ||
}, | ||
// ==== Honorics ==== | ||
{ match: '[(private|general|major|rear|prime|field|count|miss)] #Honorific? #Person', group: 0, tag: ['Honorific', 'Person'], reason: 'ambg-honorifics' }, | ||
{ | ||
match: '[(private|general|major|rear|prime|field|count|miss)] #Honorific? #Person', | ||
group: 0, | ||
tag: ['Honorific', 'Person'], | ||
reason: 'ambg-honorifics', | ||
}, | ||
// dr john foobar | ||
{ match: '#Honorific #FirstName [#Singular]', group: 0, tag: 'LastName', notIf: '#Possessive', reason: 'dr-john-foo', safe: true }, | ||
{ | ||
match: '#Honorific #FirstName [#Singular]', | ||
group: 0, | ||
tag: 'LastName', | ||
notIf: '#Possessive', | ||
reason: 'dr-john-foo', | ||
safe: true, | ||
}, | ||
//his-excellency | ||
{ match: '[(his|her) (majesty|honour|worship|excellency|honorable)] #Person', group: 0, tag: 'Honorific', reason: 'his-excellency' }, | ||
{ | ||
match: '[(his|her) (majesty|honour|worship|excellency|honorable)] #Person', | ||
group: 0, | ||
tag: 'Honorific', | ||
reason: 'his-excellency', | ||
}, | ||
// Lieutenant colonel | ||
@@ -82,0 +123,0 @@ { match: '#Honorific #Actor', tag: 'Honorific', reason: 'Lieutenant colonel' }, |
@@ -10,3 +10,3 @@ import colons from './1st-pass/01-colons.js' | ||
import checkYear from './2nd-pass/05-year.js' | ||
import verbType from './3rd-pass/06-verb-type.js' | ||
import verbType from './3rd-pass/07-verb-type.js' | ||
@@ -17,5 +17,6 @@ import fillTags from './3rd-pass/_fillTags.js' | ||
import orgWords from './3rd-pass/03-orgWords.js' | ||
import nounFallback from './3rd-pass/04-fallback.js' | ||
import placeWords from './3rd-pass/04-placeWords.js' | ||
import nounFallback from './3rd-pass/05-fallback.js' | ||
import switches from './3rd-pass/06-switches.js' | ||
import imperative from './3rd-pass/07-imperative.js' | ||
import imperative from './3rd-pass/08-imperative.js' | ||
@@ -37,7 +38,2 @@ // is it all yelling-case? | ||
colons(terms, 0, model, world) | ||
// for (let i = 0; i < terms.length; i += 1) { | ||
// hard-nosed, faith-based | ||
// hyphens(terms, i, model, world) | ||
// } | ||
}) | ||
@@ -81,2 +77,4 @@ } | ||
orgWords(terms, i, world, isYelling) | ||
// Wawel Castle | ||
placeWords(terms, i, world, isYelling) | ||
// verb-noun disambiguation, etc | ||
@@ -83,0 +81,0 @@ switches(terms, i, world) |
//similar to plural/singularize rules, but not the same | ||
const isPlural = { | ||
e: [ | ||
'mice', | ||
'louse', | ||
'antennae', | ||
'formulae', | ||
'nebulae', | ||
'vertebrae', | ||
'vitae', | ||
], | ||
i: [ | ||
'tia', | ||
'octopi', | ||
'viri', | ||
'radii', | ||
'nuclei', | ||
'fungi', | ||
'cacti', | ||
'stimuli', | ||
], | ||
n: [ | ||
'men', | ||
], | ||
t: [ | ||
'feet' | ||
] | ||
e: ['mice', 'louse', 'antennae', 'formulae', 'nebulae', 'vertebrae', 'vitae'], | ||
i: ['tia', 'octopi', 'viri', 'radii', 'nuclei', 'fungi', 'cacti', 'stimuli'], | ||
n: ['men'], | ||
t: ['feet'], | ||
} | ||
@@ -37,2 +16,3 @@ // plural words as exceptions to suffix-rules | ||
'menus', | ||
'logos', | ||
]) | ||
@@ -42,31 +22,36 @@ | ||
'bus', | ||
'mas',//christmas | ||
'mas', //christmas | ||
'was', | ||
// 'las', | ||
'ias',//alias | ||
'ias', //alias | ||
'xas', | ||
'vas', | ||
'cis',//probocis | ||
'cis', //probocis | ||
'lis', | ||
'nis',//tennis | ||
'nis', //tennis | ||
'ois', | ||
'ris', | ||
'sis',//thesis | ||
'tis',//mantis, testis | ||
'sis', //thesis | ||
'tis', //mantis, testis | ||
'xis', | ||
'aus', | ||
'cus', | ||
'eus',//nucleus | ||
'fus',//doofus | ||
'gus',//fungus | ||
'ius',//radius | ||
'lus',//stimulus | ||
'eus', //nucleus | ||
'fus', //doofus | ||
'gus', //fungus | ||
'ius', //radius | ||
'lus', //stimulus | ||
'nus', | ||
'das', | ||
'ous', | ||
'pus',//octopus | ||
'rus',//virus | ||
'sus',//census | ||
'tus',//status,cactus | ||
'pus', //octopus | ||
'rus', //virus | ||
'sus', //census | ||
'tus', //status,cactus | ||
'xus', | ||
'\'s', | ||
'aos', //chaos | ||
'igos', | ||
'ados', //barbados | ||
'ogos', | ||
"'s", | ||
'ss', | ||
@@ -73,0 +58,0 @@ ] |
@@ -13,2 +13,3 @@ import irregularPlurals from './irregulars/plurals.js' | ||
import orgWords from './orgWords.js' | ||
import placeWords from './placeWords.js' | ||
import expandLexicon from './_expand/index.js' | ||
@@ -40,4 +41,4 @@ | ||
orgWords, | ||
placeWords, | ||
}, | ||
} | ||
@@ -47,2 +48,2 @@ model = expandLexicon(model) | ||
// console.log(model.one.lexicon.see) | ||
// console.log(model.one.lexicon.see) |
@@ -88,2 +88,3 @@ //nouns that also signal the title of an unknown organization | ||
'faculty', | ||
'faction', | ||
'federation', | ||
@@ -155,2 +156,3 @@ 'financial', | ||
'police', | ||
'politburo', | ||
'polytechnic', | ||
@@ -173,2 +175,3 @@ 'post', | ||
'societe', | ||
'subsidiary', | ||
'society', | ||
@@ -175,0 +178,0 @@ 'sons', |
export default [ | ||
// #coolguy | ||
[/^#[\p{Number}_]*\p{Letter}/u, 'HashTag'],// can't be all numbers | ||
[/^#[\p{Number}_]*\p{Letter}/u, 'HashTag'], // can't be all numbers | ||
@@ -18,3 +18,3 @@ // @spencermountain | ||
// unicode character range | ||
[/^[\p{Emoji_Presentation}\p{Extended_Pictographic}]/u, 'Emoji', 'emoji-class'] | ||
[/^[\p{Emoji_Presentation}\p{Extended_Pictographic}]/u, 'Emoji', 'emoji-class'], | ||
] |
@@ -62,3 +62,3 @@ const anything = ['Noun', 'Verb', 'Adjective', 'Adverb', 'Value', 'QuestionWord'] | ||
Acronym: { | ||
not: ['Plural', 'RomanNumeral', 'Pronoun'], | ||
not: ['Plural', 'RomanNumeral', 'Pronoun', 'Date'], | ||
}, | ||
@@ -77,6 +77,6 @@ Negative: { | ||
Prefix: { | ||
not: ['Abbreviation', 'Acronym', 'ProperNoun'] | ||
not: ['Abbreviation', 'Acronym', 'ProperNoun'], | ||
}, | ||
// hard-nosed, bone-headed | ||
Hyphenated: {} | ||
Hyphenated: {}, | ||
} |
@@ -8,3 +8,3 @@ export default { | ||
is: 'Verb', | ||
not: ['PastTense'], | ||
not: ['PastTense', 'FutureTense'], | ||
}, | ||
@@ -29,4 +29,9 @@ // 'will [walk]' | ||
is: 'Verb', | ||
not: ['PresentTense', 'Gerund'], | ||
not: ['PresentTense', 'Gerund', 'FutureTense'], | ||
}, | ||
// will walk | ||
FutureTense: { | ||
is: 'Verb', | ||
not: ['PresentTense', 'PastTense'], | ||
}, | ||
// is/was | ||
@@ -33,0 +38,0 @@ Copula: { |
const find = function (doc) { | ||
let m = doc.match('#Honorific+? #Person+') | ||
// Spencer's King | ||
let poss = m.match('#Possessive').notIf('(his|her)') //her majesty ... | ||
m = m.splitAfter(poss) | ||
return m | ||
} | ||
export default find |
@@ -8,3 +8,3 @@ import nlp from './one.js' | ||
nlp.plugin(preTag) //~103kb | ||
nlp.plugin(preTag) //~103kb | ||
nlp.plugin(contractionTwo) // | ||
@@ -11,0 +11,0 @@ nlp.plugin(postTag) //~33kb |
@@ -163,2 +163,4 @@ import type { Document, Pointer, Groups, JsonProps, outMethods, matchOptions, Term, Net } from '../misc.d.ts' | ||
sort: (method?: string | Function) => View | ||
/**cleanup various aspects of the words*/ | ||
normalize: (options?:object) => View | ||
@@ -165,0 +167,0 @@ // Whitespace |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
2500062
461
27971
1057