compromise
Advanced tools
Comparing version 7.0.4 to 7.0.5
@@ -5,3 +5,3 @@ { | ||
"description": "natural language processing in the browser", | ||
"version": "7.0.4", | ||
"version": "7.0.5", | ||
"main": "./builds/compromise.js", | ||
@@ -19,3 +19,4 @@ "repository": { | ||
"filesize": "node ./scripts/filesize.js", | ||
"coverage": "node ./scripts/coverage.js" | ||
"coverage": "node ./scripts/coverage.js", | ||
"prepublish":"npm run coverage" | ||
}, | ||
@@ -34,2 +35,3 @@ "files": [ | ||
"browserify": "13.0.1", | ||
"codacy-coverage": "^2.0.0", | ||
"derequire": "^2.0.3", | ||
@@ -39,13 +41,11 @@ "eslint": "^3.1.1", | ||
"http-server": "0.9.0", | ||
"jsdoc-parse": "^1.2.7", | ||
"leakage": "^0.2.0", | ||
"nlp-corpus": "latest", | ||
"nyc": "^8.4.0", | ||
"shelljs": "^0.7.2", | ||
"tap-min": "^1.1.0", | ||
"tap-spec": "4.1.1", | ||
"tape": "4.6.0", | ||
"uglify-js": "2.7.0", | ||
"uglifyify": "^3.0.3" | ||
"uglify-js": "2.7.0" | ||
}, | ||
"license": "MIT" | ||
} |
236
README.md
<div align="center"> | ||
<div>natural-language processing in the browser</div> | ||
<img src="https://cloud.githubusercontent.com/assets/399657/21955696/46e882d4-da3e-11e6-94a6-720c34e27df7.jpg" /> | ||
<div>natural language processing, actually in the browser</div> | ||
<a href="https://www.codacy.com/app/spencerkelly86/nlp_compromise"> | ||
@@ -23,10 +24,10 @@ <img src="https://api.codacy.com/project/badge/grade/82cc8ebd98b64ed199d7be6021488062" /> | ||
<div align="center"> | ||
<code>npm install compromise@next</code> | ||
<sub>(formerly nlp_compromise)</sub> | ||
</div> | ||
<div align="center"> | ||
<sub>(formerly nlp_compromise)</sub> | ||
<code>npm install compromise</code> | ||
</div> | ||
<br/> | ||
<div align="center"> | ||
inspect and play with english text | ||
inspect and play with english text. | ||
<div> | ||
@@ -37,50 +38,134 @@ focus on being <a href="https://github.com/nlp-compromise/compromise/wiki/Justification">handy, and not overly-fancy.</a> | ||
<br/> | ||
<div align="center"> | ||
💥Welcome to <b>v7</b>💥 | ||
<div> | ||
<a href="https://github.com/nlp-compromise/compromise/wiki/v7-upgrade-instructions">a lot</a> | ||
has changed! | ||
</div> | ||
</div> | ||
```javascript | ||
nlp('I look just like buddy holly').sentences().toPastTense().out('text') | ||
let r = nlp('I look just like buddy holly.') | ||
r.sentences().toPastTense() | ||
r.out('text') | ||
// "I looked just like buddy holly." | ||
``` | ||
<div align="center"> | ||
<table align="center"> | ||
<tr align="center"> | ||
<td align="center"> | ||
<b> | ||
<a href="https://unpkg.com/compromise@latest/builds/compromise.min.js"> | ||
200k | ||
</a> | ||
</b> | ||
<div> | ||
just a javascript file | ||
</div> | ||
</td> | ||
<td align="center"> | ||
<div> | ||
<b> | ||
<a href="https://github.com/nlp-compromise/compromise/wiki/Accuracy"> | ||
86% | ||
</a> | ||
</b> | ||
<div> | ||
on the Penn treebank | ||
</div> | ||
</td> | ||
<td align="center"> | ||
<b>🙏</b> | ||
<div> | ||
<code>npm install compromise</code> | ||
</div> | ||
</td> | ||
<td align="center"> | ||
<b>IE9+</b> | ||
<div> | ||
caniuse, youbetcha | ||
</div> | ||
</td> | ||
</tr> | ||
</table> | ||
</div> | ||
<h3 align="center"> | ||
<a href="http://nlpcompromise.com">Demos</a> | ||
<a href="http://nlpcompromise.com">demos</a> | ||
<span> | </span> | ||
<a href="https://github.com/nlp-compromise/compromise/wiki/Getting-Started">Quick-start</a> | ||
<a href="https://github.com/nlp-compromise/compromise/wiki/Getting-Started">quickStart</a> | ||
<span> | </span> | ||
<a href="https://github.com/nlp-compromise/compromise/wiki/API">Docs</a> | ||
<a href="https://github.com/nlp-compromise/compromise/wiki/API">docs</a> | ||
<span> | </span> | ||
<a href="https://github.com/nlp-compromise/compromise/wiki/Accuracy">Accuracy</a> | ||
<a href="https://github.com/nlp-compromise/compromise/wiki/Accuracy">accuracy</a> | ||
</h3> | ||
<div align="center"> | ||
:boom:Welcome to <b>v7</b>:boom: | ||
<div> | ||
<a href="https://github.com/nlp-compromise/compromise/wiki/v7-upgrade-instructions">a lot</a> | ||
has changed! | ||
</div> | ||
<b>no training, configuration, or prolog</b> | ||
</div> | ||
<br/> | ||
<div align="left"> | ||
the idea is, | ||
<b> | ||
<a href="https://github.com/nlp-compromise/compromise/wiki/Match-syntax"> | ||
reach-in | ||
</a> | ||
</b> to a part of the text, and change it: | ||
</div> | ||
```javascript | ||
r = nlp('john is really nice. sara sings loudly.') | ||
### Yup, | ||
* a [200k js file](https://unpkg.com/compromise@latest/builds/compromise.min.js) | ||
* **[86%](https://github.com/nlp-compromise/compromise/wiki/Accuracy)** on the **Penn treebank** | ||
* [keypress speed](https://github.com/nlp-compromise/compromise/wiki/Performance), constant-time. | ||
* caniuse, uhuh. **IE9+** | ||
* [no dependencies](https://github.com/nlp-compromise/compromise/wiki/Getting-Started), training, configuration, or prolog | ||
r.match('#Person').toUpperCase() | ||
//JOHN is really nice. SARA sings loudly. | ||
###Grammar, for the win: | ||
```javascript | ||
r = nlp('john is really nice. sara quickly walks.') | ||
//pluck-out some parts | ||
//or pluck-out some parts, | ||
r.remove('#Adverb') | ||
// "JOHN is nice. SARA sings." | ||
//reach-in and transform parts | ||
r.match('#Person').toTitleCase() | ||
//replacements, | ||
r.replace('is nice', 'is bad') | ||
// "JOHN is bad. SARA sings." | ||
r.plaintext() | ||
// 'John is nice. Sara walks.' | ||
//fancy! | ||
r.sentences().toNegative() | ||
// "JOHN is not bad. SARA doesn't sing." | ||
``` | ||
<div align="left"> | ||
grab those parts, and <b>analyze-the-heck</b> out of them: | ||
</div> | ||
```javascript | ||
r = nlp(chomskyFanFic) | ||
r.places().sort('freq').unique().data() | ||
/*[ | ||
{text: 'MIT lecture hall'}, | ||
{text: '23 Desperado dr.'}, | ||
{text: 'desert island'}, | ||
]*/ | ||
###Conjugation: | ||
r.questions().not('^but how .+').data() | ||
/* [] */ | ||
``` | ||
<div align="center"> | ||
🤗 🤗 🤗 🤗 🤗 🤗 🤗 🤗 🤗 🤗 | ||
</div> | ||
<table align="center"> | ||
<tr> | ||
<td>Part-of-Speech Tagging</td> | ||
<td>Named-Entity Resolution</td> | ||
<td>Verb Conjugation</td> | ||
<td>Inflection/Pluralization</td> | ||
</tr> | ||
</table> | ||
###Client-side: | ||
```html | ||
<script src="https://unpkg.com/compromise@latest/builds/compromise.min.js"></script> | ||
<script> | ||
var r = nlp('dinosaur').nouns().toPlural() | ||
console.log(r.out('text')) | ||
//dinosaurs | ||
</script> | ||
``` | ||
###Tense: | ||
```javascript | ||
r = nlp('she sells seashells by the seashore.').sentences().toFuture().text() | ||
let r = nlp('she sells seashells by the seashore.') | ||
r.sentences().toFutureTense().out('text') | ||
//'she will sell seashells...' | ||
@@ -99,3 +184,3 @@ | ||
r.nouns().first().toPlural() | ||
r.text() | ||
r.out('text') | ||
//'The bottles of beer on the wall.' | ||
@@ -115,12 +200,12 @@ ``` | ||
r.values().toCardinal().text() | ||
r.values().toCardinal().out('text') | ||
// 'five of december' | ||
r.values().toNumber().text() | ||
r.values().toNumber().out('text') | ||
// '5 of december' | ||
``` | ||
###Clever normalization: | ||
###Normalization: | ||
```javascript | ||
r = nlp("the guest-singer's björk at seven thirty.").normalize().text() | ||
r = nlp("the guest-singer's björk at seven thirty.").normalize().out('text') | ||
// 'The guest singer is Bjork at 7:30.' | ||
@@ -132,6 +217,6 @@ ``` | ||
r = nlp('the opera about richard nixon visiting china') | ||
r.match('(#Person|#Place|#Organization)').data() | ||
r.topics().data() | ||
// [ | ||
// { text: 'richard nixon', tags: ['Person'] }, | ||
// { text: 'china', tags: ['Place', 'Country'] } | ||
// { text: 'richard nixon' }, | ||
// { text: 'china' } | ||
// ] | ||
@@ -142,3 +227,3 @@ ``` | ||
```javascript | ||
r = nlp('Tony Hawk won').asHtml() | ||
r = nlp('Tony Hawk won').out('html') | ||
/* | ||
@@ -156,9 +241,43 @@ <span> | ||
###Join-in: | ||
<div align="center"> | ||
<b>Join in!</b> | ||
<div align="left"> | ||
we're fun, we're using <b>semver</b>, and moving fast. | ||
<a href="https://github.com/nlp-compromise/compromise/wiki/Contributing"> | ||
:hammer_and_wrench: get involved :dancer: </a> | ||
<i> | ||
<a href="https://github.com/nlp-compromise/compromise/wiki/Contributing"> | ||
:hammer_and_wrench: get involved :dancer: | ||
</a> | ||
</i> | ||
</div> | ||
<table> | ||
<tr align="center"> | ||
<td> | ||
<a href="https://www.twitter.com/compromisejs"> | ||
<img src="https://cloud.githubusercontent.com/assets/399657/21956672/a30cf206-da53-11e6-8c6c-0995cf2aef62.jpg"/> | ||
<div> Twitter </div> | ||
</a> | ||
</td> | ||
<td> | ||
<a href="http://slack.compromise.cool/"> | ||
<img src="https://cloud.githubusercontent.com/assets/399657/21956671/a30cbc82-da53-11e6-82d6-aaaaebc0bc93.jpg"/> | ||
<div> Slack group </div> | ||
</a> | ||
</td> | ||
<td> | ||
<a href="http://nlpcompromise.us12.list-manage2.com/subscribe?u=d5bd9bcc36c4bef0fd5f6e75f&id=8738c1f5ef"> | ||
<img src="https://cloud.githubusercontent.com/assets/399657/21956670/a30be6e0-da53-11e6-9aaf-52a10b8c3195.jpg"/> | ||
<div> Mailing-list </div> | ||
</a> | ||
</td> | ||
<td> | ||
<a href="https://github.com/nlp-compromise/compromise/wiki/Contributing"> | ||
<img src="https://cloud.githubusercontent.com/assets/399657/21956742/5985a89c-da55-11e6-87bc-4f0f1549d202.jpg"/> | ||
<div> Pull-requests </div> | ||
</a> | ||
</td> | ||
</tr> | ||
</table> | ||
<br/> | ||
<div align="center"> | ||
@@ -174,7 +293,5 @@ <a href="https://www.youtube.com/watch?v=tk_JGu2AbJY"> | ||
#See also | ||
For the former promise-library, see [jnewman/compromise](https://github.com/jnewman/compromise) | ||
(Thanks to the awesome [jnewman](https://github.com/jnewman) for the npm package!) | ||
* **[naturalNode](https://github.com/NaturalNode/natural)** - decidedly fancier, statistical nlp in javascript | ||
* **[SuperScript](http://superscriptjs.com/)** - clever conversation engine in javascript | ||
###Don't forget about: | ||
* **[naturalNode](https://github.com/NaturalNode/natural)** - decidedly fancier, statistical nlp in javascript, too | ||
* **[SuperScript](http://superscriptjs.com/)** - clever conversation engine in js | ||
* **[NodeBox Linguistics](https://www.nodebox.net/code/index.php/Linguistics)** - conjugation, inflection in javascript | ||
@@ -185,8 +302,15 @@ * **[reText](https://github.com/wooorm/retext)** - very impressive [text utilities](https://github.com/wooorm/retext/blob/master/doc/plugins.md) in javascript | ||
(don't forget | ||
[NLTK](http://www.nltk.org/), | ||
[GATE](https://gate.ac.uk), | ||
[Stanford](http://nlp.stanford.edu/software/lex-parser.shtml), | ||
For the former promise-library, see [jnewman/compromise](https://github.com/jnewman/compromise) | ||
(Thanks [Joshua](https://github.com/jnewman)!) | ||
<div align="right"> | ||
(also don't forget | ||
<a href="http://www.nltk.org/">NLTK</a>, | ||
<a href="https://gate.ac.uk">GATE</a>, | ||
<a href="http://nlp.stanford.edu/software/lex-parser.shtml">Stanford</a>, | ||
and | ||
[Illinois toolkit](http://cogcomp.cs.illinois.edu/page/software/) | ||
<a href="http://cogcomp.cs.illinois.edu/page/software/">Illinois toolkit</a> | ||
) | ||
❤️️ | ||
</div> | ||
[![Codacy Badge](https://api.codacy.com/project/badge/Coverage/82cc8ebd98b64ed199d7be6021488062)](https://www.codacy.com/app/spencerkelly86/nlp_compromise) |
@@ -43,3 +43,3 @@ //adjectives that either aren't covered by rules, or have superlative/comparative forms | ||
ed: 'advanc,belov,craz,determin,hallow,hook,inbr,justifi,nak,nuanc,sacr,subdu,unauthoriz,unrecogniz,wick', | ||
ly: 'dai,deep,earth,gris,heaven,low,meas,melancho,month,oi,prick,seem,s,ug,unru,week,wi,woman', | ||
ly: 'dai,earth,gris,heaven,low,meas,month,oi,prick,seem,s,ug,unru,week,wi,woman', | ||
al: 'actu,coloss,glob,illeg,leg,leth,liter,loy,ov,riv,roy,univers,usu', | ||
@@ -88,3 +88,2 @@ dy: 'baw,bloo,clou,gau,gid,han,mol,moo,stur,ti,tren,unti,unwiel', | ||
'average', | ||
'awake', | ||
'backwards', | ||
@@ -103,3 +102,2 @@ 'bad', | ||
'crisp', | ||
'deaf', | ||
'devout', | ||
@@ -134,3 +132,2 @@ 'difficult', | ||
'left', | ||
'less', | ||
'level', | ||
@@ -162,3 +159,2 @@ 'lewd', | ||
'ritzy', | ||
'rough', | ||
'savvy', | ||
@@ -165,0 +161,0 @@ 'sexy', |
@@ -7,2 +7,3 @@ 'use strict'; | ||
'lastnames': require('./people/lastnames'), | ||
'notable_people': require('./people/notable'), | ||
@@ -31,3 +32,4 @@ 'currencies': require('./values/currencies'), | ||
'adjectives': require('./adjectives/adjectives'), | ||
'superlatives': require('./adjectives/convertable'), | ||
'superlatives': require('./adjectives/superlatives'), | ||
'verbConverts': require('./adjectives/verbConverts'), | ||
@@ -34,0 +36,0 @@ 'irregular_verbs': require('./verbs/irregular_verbs'), |
'use strict'; | ||
//a lexicon is a giant object of known words | ||
//a lexicon is a giant object of known words and their assumed pos-tag. | ||
//the way we make it rn is a bit of a mess. | ||
const data = require('./index'); | ||
@@ -11,5 +12,8 @@ const fns = require('./fns'); | ||
toComparative: require('../term/adjective/toComparative'), | ||
toAdverb: require('../term/adjective/toAdverb') | ||
toAdverb: require('../term/adjective/toAdverb'), | ||
toVerb: require('../term/adjective/toVerb') | ||
}; | ||
const toAdjective = require('../term/verb/toAdjective'); | ||
// console.time('lexicon'); | ||
@@ -74,13 +78,5 @@ let lexicon = {}; | ||
//conjugate verblist | ||
const wantVerbs = [ | ||
'PastTense', | ||
'PresentTense', | ||
'Infinitive', | ||
'Gerund', | ||
'Actor', | ||
'Adjective' | ||
]; | ||
data.verbs.forEach((v) => { | ||
let o = fastConjugate(v); | ||
wantVerbs.forEach((k) => { | ||
Object.keys(o).forEach((k) => { | ||
if (o[k] && !lexicon[o[k]]) { | ||
@@ -90,2 +86,3 @@ lexicon[o[k]] = k; | ||
}); | ||
lexicon[toAdjective(v)] = 'Adjective'; | ||
}); | ||
@@ -101,2 +98,20 @@ | ||
//even more expressive adjectives | ||
data.verbConverts.forEach((a) => { | ||
lexicon[adj.toNoun(a)] = 'Noun'; | ||
lexicon[adj.toAdverb(a)] = 'Adverb'; | ||
lexicon[adj.toSuperlative(a)] = 'Superlative'; | ||
lexicon[adj.toComparative(a)] = 'Comparative'; | ||
const v = adj.toVerb(a); | ||
lexicon[v] = 'Verb'; | ||
//now conjugate it | ||
let o = fastConjugate(v); | ||
Object.keys(o).forEach((k) => { | ||
if (o[k] && !lexicon[o[k]]) { | ||
lexicon[o[k]] = k; | ||
} | ||
}); | ||
}); | ||
//inflect nouns | ||
@@ -110,4 +125,5 @@ data.nouns.forEach((n) => { | ||
//let a rip. | ||
addArr(data.verbs, 'Verb'); | ||
addObj(data.firstnames); | ||
addArr(data.notable_people.female, 'FemaleName'); | ||
addArr(data.notable_people.male, 'MaleName'); | ||
addArr(data.lastnames, 'LastName'); | ||
@@ -120,2 +136,3 @@ addArr(data.places.airports, 'Place'); | ||
addArr(data.adjectives, 'Adjective'); | ||
addArr(data.verbConverts, 'Adjective'); | ||
addArr(data.superlatives, 'Adjective'); | ||
@@ -133,3 +150,3 @@ addArr(data.currencies, 'Currency'); | ||
// console.log(lexicon['years']); | ||
// console.log(lexicon['will walk']); | ||
// let t = new Term('shake'); | ||
@@ -136,0 +153,0 @@ // t.tag.Verb = true; |
@@ -59,2 +59,20 @@ 'use strict'; | ||
Verb: [ | ||
'lengthen', | ||
'heighten', | ||
'worsen', | ||
'lessen', | ||
'awaken', | ||
'frighten', | ||
'threaten', | ||
'hasten', | ||
'strengthen', | ||
'given', | ||
//misc | ||
'known', | ||
'shown', | ||
'seen', | ||
'born' | ||
], | ||
Place: [ | ||
@@ -139,9 +157,2 @@ 'new england', | ||
'Verb': [ | ||
'given', | ||
'known', | ||
'shown', | ||
'seen', | ||
'born' | ||
], | ||
@@ -148,0 +159,0 @@ 'Gerund': [ |
@@ -31,2 +31,4 @@ //most nouns do not nead to be listed | ||
'god', | ||
'glacier', | ||
'canary', | ||
'grand slam', | ||
@@ -33,0 +35,0 @@ 'head start', |
@@ -40,3 +40,4 @@ | ||
'rosario', | ||
'lee' | ||
'lee', | ||
'mel' | ||
]; |
@@ -89,2 +89,3 @@ 'use strict'; | ||
'andrea', | ||
'annika', | ||
'beatriz', | ||
@@ -121,2 +122,3 @@ 'bettye', | ||
'jo', | ||
'jodie', | ||
'joni', | ||
@@ -158,2 +160,3 @@ 'kate', | ||
'rosalind', | ||
'shania', | ||
'sheryl', | ||
@@ -225,2 +228,1 @@ 'sue', | ||
module.exports = list; | ||
// console.log(list.indexOf('kelley')); |
@@ -44,11 +44,15 @@ 'use strict'; | ||
'phillip', | ||
'regis', | ||
'rex', | ||
'ricky', | ||
'shaun', | ||
'shaquille', | ||
'shawn', | ||
'steve', | ||
'timothy', | ||
'ty', | ||
'wilbur', | ||
'williams', | ||
'woodrow', | ||
'wolfgang', | ||
'youssef', | ||
@@ -65,2 +69,3 @@ 'mahmoud', | ||
'adama', | ||
'osama', | ||
'abdoulaye', | ||
@@ -67,0 +72,0 @@ 'modibo', |
@@ -7,6 +7,24 @@ 'use strict'; | ||
const Terms = require('./paths').Terms; | ||
const normalize = require('../term/normalize').normalize; | ||
//basically really dirty and stupid. | ||
const normalizeLex = function(lex) { | ||
lex = lex || {}; | ||
return Object.keys(lex).reduce((h, k) => { | ||
//add natural form | ||
h[k] = lex[k]; | ||
let normal = normalize(k); | ||
if (k !== normal) { | ||
//add it too | ||
h[normal] = lex[k]; | ||
} | ||
return h; | ||
}, {}); | ||
}; | ||
//build a new pos-tagged Result obj from a string | ||
const fromString = (str, lexicon) => { | ||
let sentences = tokenize(str); | ||
//make sure lexicon obeys standards | ||
lexicon = normalizeLex(lexicon); | ||
let list = sentences.map((s) => Terms.fromString(s, lexicon)); | ||
@@ -13,0 +31,0 @@ let r = new Text(list, lexicon); |
@@ -45,7 +45,7 @@ 'use strict'; | ||
//try known first-names | ||
if (this.firstName.match('#MalePerson').found) { | ||
if (this.firstName.match('#MaleName').found) { | ||
log.tell('known male name'); | ||
return 'Male'; | ||
} | ||
if (this.firstName.match('#FemalePerson').found) { | ||
if (this.firstName.match('#FemaleName').found) { | ||
log.tell('known female name'); | ||
@@ -52,0 +52,0 @@ return 'Female'; |
@@ -43,3 +43,3 @@ 'use strict'; | ||
r.match('(half|quarter) #Ordinal').tag('Value', 'half-ordinal'); | ||
r.match('#Value and #Value').tag('Value', 'value-and-value'); | ||
r.match('(hundred|thousand|million|billion|trillion) and #Value').tag('Value', 'magnitude-and-value'); | ||
r.match('#Value point #Value').tag('Value', 'value-point-value'); | ||
@@ -46,0 +46,0 @@ |
@@ -5,2 +5,3 @@ 'use strict'; | ||
const date_corrections = require('./date_corrections'); | ||
const person_corrections = require('./person_corrections'); | ||
@@ -46,18 +47,3 @@ // | ||
//people chunks | ||
//John L. Foo | ||
r.match('#FirstName #Acronym #TitleCase').tag('Person', 'firstname-acronym-titlecase'); | ||
//Mr Foo | ||
r.match('#Honorific #FirstName? #TitleCase').tag('Person', 'Honorific-TitleCase'); | ||
//John Foo | ||
r.match('#FirstName #TitleCase').match('#FirstName #Noun').tag('Person', 'firstname-titlecase'); | ||
//ludwig van beethovan | ||
r.match('#TitleCase (van|al) #TitleCase').tag('Person', 'correction-titlecase-van-titlecase'); | ||
r.match('#TitleCase (de|du) la? #TitleCase').tag('Person', 'correction-titlecase-van-titlecase'); | ||
//peter the great | ||
r.match('#FirstName the #Adjective').tag('Person', 'correction-determiner5'); | ||
//Morgan Shlkjsfne | ||
r.match('#Person #TitleCase').match('#TitleCase #Noun').tag('Person', 'correction-person-titlecase'); | ||
//organiation | ||
//organization | ||
r.match('#Organization (inc|bros|lmt|co|incorporation|corp|corporation)').tag('Organization', 'org-abbreviation'); | ||
@@ -105,10 +91,3 @@ | ||
//last names | ||
let reason = 'person-correction'; | ||
r.match('#FirstName #Acronym? #TitleCase').ifNo('#Date').tag('#Person', reason).lastTerm().tag('#LastName', reason); | ||
r.match('#FirstName (#Singular|#Possessive)').ifNo('#Date').tag('#Person', reason).lastTerm().tag('#LastName', reason); | ||
r.match('#FirstName #Acronym #Noun').ifNo('#Date').tag('#Person', reason).lastTerm().tag('#LastName', reason); | ||
r.match('(lady|queen) #TitleCase').ifNo('#Date').tag('#FemalePerson', reason); | ||
r.match('(king|pope) #TitleCase').ifNo('#Date').tag('#MalePerson', reason); | ||
r = person_corrections(r); | ||
r = date_corrections(r); | ||
@@ -115,0 +94,0 @@ |
@@ -40,5 +40,7 @@ 'use strict'; | ||
['Noun', 'VerbPhrase'], | ||
//roman numerals | ||
['RomanNumeral', 'Fraction', 'NiceNumber'], | ||
['RomanNumeral', 'Money'], | ||
//cases | ||
['UpperCase', 'TitleCase', 'CamelCase'] | ||
]; | ||
@@ -45,0 +47,0 @@ |
@@ -66,4 +66,6 @@ //the POS tags we use, according to their dependencies | ||
Ordinal: true, | ||
Cardinal: { | ||
RomanNumeral: true, | ||
}, | ||
Fraction: true, | ||
Cardinal: true, | ||
TextValue: true, | ||
@@ -70,0 +72,0 @@ NumericValue: true, |
'use strict'; | ||
const toAdverb = require('./toAdverb'); | ||
const toVerb = require('./toVerb'); | ||
const toNoun = require('./toNoun'); | ||
@@ -20,2 +21,5 @@ const toComparative = require('./toComparative'); | ||
}, | ||
verbForm: function() { | ||
return toVerb(this.normal); | ||
}, | ||
conjugate: function() { | ||
@@ -22,0 +26,0 @@ return { |
//turn 'quick' into 'quickly' | ||
'use strict'; | ||
const convertables = require('./paths').data.superlatives; | ||
const convertables = require('./convertable'); | ||
@@ -75,3 +75,3 @@ const irregulars = { | ||
if (convertables.indexOf(str) !== -1) { | ||
if (convertables[str] !== undefined) { | ||
if (str.match(/e$/)) { | ||
@@ -78,0 +78,0 @@ return str + 'r'; |
//turn 'quick' into 'quickest' | ||
'use strict'; | ||
const convertables = require('./paths').data.superlatives; | ||
const convertables = require('./convertable'); | ||
@@ -71,3 +71,3 @@ const irregulars = { | ||
if (convertables.indexOf(str) !== -1) { | ||
if (convertables.hasOwnProperty(str)) { | ||
return generic_transformation(str); | ||
@@ -74,0 +74,0 @@ } |
@@ -5,3 +5,3 @@ 'use strict'; | ||
const isMatch = require('./isMatch'); | ||
const addNormal = require('./normalize'); | ||
const addNormal = require('./normalize').addNormal; | ||
const addRoot = require('./root'); | ||
@@ -8,0 +8,0 @@ const fns = require('./paths').fns; |
'use strict'; | ||
const fixUnicode = require('./fixUnicode'); | ||
const normalize = function (term) { | ||
let str = term._text || ''; | ||
//some basic operations on a string to reduce noise | ||
exports.normalize = function(str) { | ||
str = str || ''; | ||
str = str.toLowerCase(); | ||
str = str.trim(); | ||
//(very) rough asci transliteration - bjŏrk -> bjork | ||
@@ -25,2 +27,8 @@ str = fixUnicode(str); | ||
} | ||
return str; | ||
}; | ||
exports.addNormal = function (term) { | ||
let str = term._text || ''; | ||
str = exports.normalize(str); | ||
//compact acronyms | ||
@@ -35,4 +43,3 @@ if (term.term.isAcronym()) { | ||
module.exports = normalize; | ||
// console.log(normalize('Dr. V Cooper')); |
@@ -10,6 +10,5 @@ 'use strict'; | ||
const want = [ | ||
'Gerund', | ||
'PastTense', | ||
'PresentTense', | ||
'FutureTense', | ||
'Gerund' | ||
]; | ||
@@ -44,2 +43,2 @@ | ||
module.exports = fasterConjugate; | ||
// console.log(fasterConjugate('play')); | ||
// console.log(fasterConjugate('walk')); |
@@ -33,3 +33,3 @@ 'use strict'; | ||
//check irregular forms | ||
const irregObj = checkIrregulars(all['Infinitive']); | ||
const irregObj = checkIrregulars(all['Infinitive']) || {}; | ||
Object.keys(irregObj).forEach((k) => { | ||
@@ -36,0 +36,0 @@ if (irregObj[k] && !all[k]) { |
@@ -21,8 +21,9 @@ 'use strict'; | ||
} | ||
//longer check of known-verb forms | ||
for(let i = 0; i < infArr.length; i++) { | ||
for(let o = 0; o < forms.length; o++) { | ||
let irObj = irregulars[infArr[i]]; | ||
if (irObj[forms[o]]) { | ||
if (irObj[forms[o]] === str) { | ||
let obj = Object.assign({}, irObj); | ||
obj.Infinitive = str; | ||
obj.Infinitive = infArr[i]; | ||
return obj; | ||
@@ -32,6 +33,6 @@ } | ||
} | ||
return null; | ||
return {}; | ||
}; | ||
module.exports = checkIrregulars; | ||
// console.log(checkIrregulars('understood')); | ||
// console.log(checkIrregulars('bit')); |
@@ -7,2 +7,3 @@ 'use strict'; | ||
const conjugate = require('./conjugate'); | ||
const toAdjective = require('./toAdjective'); | ||
let pluralMap = { | ||
@@ -64,2 +65,6 @@ 'is': 'are', | ||
asAdjective: function() { | ||
return toAdjective(this.normal); | ||
}, | ||
//mutable methods | ||
@@ -66,0 +71,0 @@ toPastTense: function () { |
@@ -6,2 +6,14 @@ 'use strict'; | ||
//not so smart (right now) | ||
const isRomanNumeral = function(t) { | ||
if (!t.term.canBe('RomanNumeral')) { | ||
return false; | ||
} | ||
const str = t.text; | ||
if (str.length > 1 && str.match(/^[IVXCM]+$/)) { | ||
return true; | ||
} | ||
return false; | ||
}; | ||
const oneLetters = { | ||
@@ -22,9 +34,5 @@ a: true, | ||
//anything can be titlecase | ||
if (str.match(/^[A-Z][a-z]/)) { | ||
if (str.match(/^[A-Z][a-z']/)) { | ||
t.tagAs('TitleCase', 'punct-rule'); | ||
} | ||
//don't over-write any other known tags | ||
if (Object.keys(t.tag).length > 0) { | ||
return; | ||
} | ||
//ok, normalise it a little, | ||
@@ -36,3 +44,6 @@ str = str.replace(/[,\.\?]$/, ''); | ||
if (str.match(r.reg)) { | ||
t.tagAs(r.tag, 'punctuation-rule- "' + r.str + '"'); | ||
//don't over-write any other known tags | ||
if (t.term.canBe(r.tag)) { | ||
t.tagAs(r.tag, 'punctuation-rule- "' + r.str + '"'); | ||
} | ||
return; | ||
@@ -45,2 +56,6 @@ } | ||
} | ||
//roman numerals (weak rn) | ||
if (isRomanNumeral(t)) { | ||
t.tagAs('RomanNumeral', 'is-roman-numeral'); | ||
} | ||
@@ -47,0 +62,0 @@ }); |
@@ -9,6 +9,2 @@ 'use strict'; | ||
s.terms.forEach((t) => { | ||
//don't over-write any known tags | ||
if (Object.keys(t.tag).length > 0) { | ||
return; | ||
} | ||
//do normalized rules (on t.normal) | ||
@@ -18,3 +14,6 @@ for (let o = 0; o < rules.length; o++) { | ||
if (t.normal.match(r.reg)) { | ||
t.tagAs(r.tag, 'word-rule- "' + r.str + '"'); | ||
//don't over-write any other known tags | ||
if (t.term.canBe(r.tag)) { | ||
t.tagAs(r.tag, 'word-rule- "' + r.str + '"'); | ||
} | ||
return; | ||
@@ -21,0 +20,0 @@ } |
@@ -6,2 +6,24 @@ 'use strict'; | ||
//tags that dont really count | ||
const nothing = { | ||
TitleCase: true, | ||
UpperCase: true, | ||
CamelCase: true | ||
}; | ||
//are the tags basically empty | ||
const gotNothing = function(t) { | ||
//fail-fast | ||
if (t.tag.Noun || t.tag.Verb || t.tag.Adjective) { | ||
return false; | ||
} | ||
let tags = Object.keys(t.tag); | ||
if (tags.length === 0) { | ||
return true; | ||
} | ||
if (tags.filter(tag => !nothing[tag]).length === 0) { | ||
return true; | ||
} | ||
return false; | ||
}; | ||
const noun_fallback = function(s) { | ||
@@ -16,4 +38,3 @@ log.here(path); | ||
//ensure it only has the tag 'Term' | ||
let tags = Object.keys(t.tag); | ||
if (tags.length === 0) { | ||
if (gotNothing(t)) { | ||
//ensure it's atleast word-looking | ||
@@ -20,0 +41,0 @@ if (t.term.isWord() === false) { |
@@ -36,7 +36,7 @@ //these are regexes applied to t.text, instead of t.normal | ||
//o'douggan | ||
['o\'[^aeiouy].*', 'LastName'], | ||
['o\'[drlkn].*', 'LastName'], | ||
].map(function (a) { | ||
return { | ||
reg: new RegExp('^' + a[0] + '$'), | ||
reg: new RegExp('^' + a[0] + '$', 'i'), | ||
tag: a[1], | ||
@@ -43,0 +43,0 @@ str: a[0] |
@@ -113,3 +113,3 @@ 'use strict'; | ||
['.[^aeiou]ful$', 'Adjective'], | ||
['.[^aeiou]ish$', 'Adjective'], | ||
['.[^aeiouf]ish$', 'Adjective'], | ||
['.[^aeiou]ica$', 'Singular'], | ||
@@ -124,3 +124,2 @@ ['[aeiou][^aeiou]is$', 'Singular'], | ||
['[aeiou][^aeiou]id$', 'Adjective'], | ||
['.[^aeiou]ish$', 'Adjective'], | ||
['.[^aeiou]ive$', 'Adjective'], | ||
@@ -137,4 +136,4 @@ ['[ea]{2}zy$', 'Adjective'], | ||
['[aeiou].*ist$', 'Adjective'], | ||
['(over|under)[a-z]{2,}$', 'Adjective'], | ||
['[^i]fer$', 'Infinitive'], | ||
['(bb|tt|gg|pp|ll)..?$', 'Verb'], //rubbed | ||
['[aeiou]c?ked$', 'PastTense'], //hooked | ||
@@ -141,0 +140,0 @@ ['(eastern|central|mountain|pacific)( standard)? time', 'Time'], //PST, eastern time. Todo:(only American right now) |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
1012553
16
243
27935
308