compromise
Advanced tools
Comparing version 11.14.3 to 12.0.0-rc1
@@ -9,2 +9,6 @@ compromise uses semver, and pushes to npm frequently | ||
### v12 | ||
- drop support for `regex` and `patterns` in plugins | ||
- camelCase() now removes punctuation between terms | ||
### v11 | ||
@@ -11,0 +15,0 @@ ##### 11.13.0 |
@@ -5,37 +5,40 @@ { | ||
"description": "natural language processing in the browser", | ||
"version": "11.14.3", | ||
"version": "12.0.0-rc1", | ||
"main": "./builds/compromise.js", | ||
"unpkg": "./builds/compromise.min.js", | ||
"types": "types", | ||
"module": "./builds/compromise.mjs", | ||
"types": "types/index.d.ts", | ||
"repository": { | ||
"type": "git", | ||
"url": "git://github.com/nlp-compromise/compromise.git" | ||
"url": "git://github.com/spencermountain/compromise.git" | ||
}, | ||
"scripts": { | ||
"test": "tape \"./test/unit/**/*.test.js\" | tap-dancer", | ||
"test:spec": "tape \"./test/unit/**/*.test.js\" | tap-spec", | ||
"testb": "TESTENV=prod tape \"./test/unit/**/*.test.js\" | tap-dancer", | ||
"buildTest": "TESTENV=prod node ./scripts/test.js", | ||
"test:types": "dtslint types", | ||
"browserTest": "node ./scripts/browserTest.js", | ||
"benchmark": "node ./scripts/benchmark.js", | ||
"build": "node ./scripts/build/index.js", | ||
"build": "npm run version && rollup -c && npm run filesize", | ||
"build:all": "node ./scripts/buildAll.js && npm run build", | ||
"pack": "node ./scripts/pack.js", | ||
"postpublish": "node ./scripts/postpublish", | ||
"version": "node ./scripts/version.js", | ||
"test": "node ./scripts/testAll.js", | ||
"testb": "TESTENV=prod node ./scripts/testAll.js", | ||
"testOne": "tape \"./tests/**/*.test.js\" | tap-dancer", | ||
"test:spec": "tape \"./tests/**/*.test.js\" | tap-spec", | ||
"filesize": "node ./scripts/filesize.js", | ||
"watch": "amble ./scratch.js", | ||
"filesize": "node ./scripts/lib/filesize.js", | ||
"coverage": "node ./scripts/postpublish/coverage.js", | ||
"lint": "node ./scripts/prepublish/linter.js" | ||
"stress": "node ./scripts/stress-test/stress.js", | ||
"plugins": "node ./scripts/plugin-check.js", | ||
"speed": "node ./scripts/stress-test/speed.js", | ||
"demo": "python -m SimpleHTTPServer 8888", | ||
"coverage": "nyc --reporter=html tape \"./tests/**/*.test.js\" | tap-dancer --color always", | ||
"test:types": "ts-node ./types/types.test.ts | tap-dancer", | ||
"lint": "eslint ./src/" | ||
}, | ||
"files": [ | ||
"builds/", | ||
"docs/", | ||
"types/index.d.ts" | ||
], | ||
"prettier": { | ||
"trailingComma": "none", | ||
"trailingComma": "es5", | ||
"tabWidth": 2, | ||
"semi": true, | ||
"semi": false, | ||
"singleQuote": true, | ||
"printWidth": 100 | ||
"printWidth": 120 | ||
}, | ||
@@ -46,20 +49,20 @@ "dependencies": { | ||
"devDependencies": { | ||
"@babel/core": "7.5.5", | ||
"@babel/preset-env": "7.5.5", | ||
"@babel/core": "7.6.4", | ||
"@babel/preset-env": "7.6.3", | ||
"amble": "0.0.7", | ||
"babelify": "10.0.0", | ||
"babili": "0.1.4", | ||
"browserify": "16.5.0", | ||
"chalk": "2.4.2", | ||
"codecov": "3.5.0", | ||
"compromise-plugin": "0.0.9", | ||
"derequire": "2.0.6", | ||
"dtslint": "0.9.3", | ||
"nyc": "14.1.1", | ||
"efrt": "2.2.1", | ||
"rollup": "1.26.3", | ||
"rollup-plugin-babel": "4.3.3", | ||
"rollup-plugin-commonjs": "10.1.0", | ||
"rollup-plugin-json": "4.0.0", | ||
"rollup-plugin-node-resolve": "5.2.0", | ||
"rollup-plugin-terser": "5.1.2", | ||
"shelljs": "0.8.3", | ||
"tap-dancer": "0.2.0", | ||
"tape": "4.11.0", | ||
"terser": "4.2.1" | ||
"tape": "4.11.0" | ||
}, | ||
"eslintIgnore": [ | ||
"builds/*.js" | ||
], | ||
"license": "MIT" | ||
} |
1028
README.md
<div align="center"> | ||
<img src="https://cloud.githubusercontent.com/assets/399657/21955696/46e882d4-da3e-11e6-94a6-720c34e27df7.jpg" /> | ||
<div><b>compromise</b></div> | ||
<img src="https://user-images.githubusercontent.com/399657/68222691-6597f180-ffb9-11e9-8a32-a7f38aa8bded.png"/> | ||
<div>modest natural language processing</div> | ||
<div><code>npm install compromise</code></div> | ||
<div align="center"> | ||
<sub> | ||
by | ||
<a href="https://github.com/spencermountain">Spencer Kelly</a> and | ||
<a href="https://github.com/spencermountain/compromise/graphs/contributors"> | ||
many contributors | ||
</a> | ||
</sub> | ||
</div> | ||
<img height="25px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
</div> | ||
<div align="center"> | ||
<a href="https://npmjs.org/package/compromise"> | ||
<div> | ||
<a href="https://npmjs.org/package/compromise"> | ||
<img src="https://img.shields.io/npm/v/compromise.svg?style=flat-square" /> | ||
@@ -14,110 +29,169 @@ </a> | ||
</a> | ||
<div>modest natural-language processing in javascript</div> | ||
<sub> | ||
by | ||
<a href="https://github.com/spencermountain">Spencer Kelly</a> and | ||
<a href="https://github.com/spencermountain/compromise/graphs/contributors"> | ||
many contributors | ||
</a> | ||
</sub> | ||
</div> | ||
</div> | ||
<br/> | ||
<!-- small enough for the browser... --> | ||
<img src="https://user-images.githubusercontent.com/399657/35828705-828fd2ca-0a8e-11e8-9f12-88e840b8b399.png" /> | ||
<!-- spacer --> | ||
<img height="15px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
<!-- two gifs --> | ||
<table align="center"> | ||
<tr> | ||
<td> | ||
<a href="http://compromise.cool"> | ||
<img width="390" src="https://user-images.githubusercontent.com/399657/35871664-cdab2bca-0b32-11e8-8827-81de658216fa.gif" /> | ||
</a> | ||
</td> | ||
<td> | ||
<a href="http://compromise.cool"> | ||
<img width="390" src="https://user-images.githubusercontent.com/399657/35871669-d05e8d26-0b32-11e8-99c6-0f8887ae40ea.gif" /> | ||
</a> | ||
</td> | ||
</tr> | ||
</table> | ||
<div align="left"> | ||
- <img height="30px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>compromise<a href="https://observablehq.com/@spencermountain/compromise-justification">tries its best</a>. | ||
</div> | ||
save yourself from **regex-whackamole**🤞: | ||
<div align="left"> | ||
<img height="30px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
it is | ||
<a href="https://docs.compromise.cool/compromise-filesize">small, | ||
<a href="https://docs.compromise.cool/compromise-performance">quick</a>, | ||
and <a href="https://docs.compromise.cool/compromise-accuracy">usually good-enough</a>. | ||
</div> | ||
<!-- spacer --> | ||
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
### .match(): | ||
compromise makes it simple to interpret and match text: | ||
```js | ||
nlp(entireNovel).sentences().if('the #Adjective of times').out() | ||
let doc = nlp(entireNovel) | ||
doc.if('the #Adjective of times').text() | ||
// "it was the blurst of times??" | ||
``` | ||
move things around: | ||
```js | ||
nlp('she sells seashells by the seashore.').sentences().toFutureTense().out() | ||
// 'she will sell seashells...' | ||
if (doc.has('^simon says #Verb+')) { | ||
return doc.match('#Verb .*').text() //'fire the lazer ..' | ||
} | ||
``` | ||
respond to text input: | ||
<div align="right"> | ||
<a href="https://docs.compromise.cool/compromise-match">match docs</a> | ||
</div> | ||
<div align="center"> | ||
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221837-0d142480-ffb8-11e9-9d30-90669f1b897c.png"/> | ||
</div> | ||
### .verbs(): | ||
conjugate and negate verbs in any tense: | ||
```js | ||
if( doc.has('^simon says (shoot|fire) #Determiner lazer') ){ | ||
fireLazer() | ||
} else { | ||
dontFire() | ||
} | ||
let doc = nlp('she sells seashells by the seashore.') | ||
doc.verbs().toPastTense() | ||
doc.text() | ||
// 'she sold seashells by the seashore.' | ||
``` | ||
<div align="right"> | ||
<a href="https://docs.compromise.cool/verbs">verb docs</a> | ||
</div> | ||
<div align="center"> | ||
compromise is not <a href="https://github.com/spencermountain/compromise/wiki/Justification">the cleverest</a>. | ||
<br/> | ||
but it is | ||
<a href="https://beta.observablehq.com/@spencermountain/compromise-filesize">small, | ||
<a href="https://beta.observablehq.com/@spencermountain/compromise-performance">quick</a>, | ||
and <a href="https://beta.observablehq.com/@spencermountain/compromise-accuracy">good-enough</a> for a bunch of stuff. | ||
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221824-09809d80-ffb8-11e9-9ef0-6ed3574b0ce8.png"/> | ||
</div> | ||
---- | ||
### .nouns(): | ||
<!-- three-table section --> | ||
transform nouns to plural and possessive forms: | ||
```js | ||
let doc = nlp('the purple dinosaur') | ||
doc.nouns().toPlural() | ||
doc.text() | ||
// 'the purple dinosaurs' | ||
``` | ||
<div align="right"> | ||
<a href="https://docs.compromise.cool/nouns">noun docs</a> | ||
</div> | ||
<div align="center"> | ||
<table align="center"> | ||
<tr align="center"> | ||
<td align="center"> | ||
<b> | ||
<script src> | ||
</b> | ||
<div> | ||
<a href="https://github.com/spencermountain/compromise/wiki/QuickStart">one javascript file</a> | ||
</div> | ||
</td> | ||
<td align="center"> | ||
<b>🙏</b> | ||
<div> | ||
<kbd>npm install compromise</kbd> | ||
</div> | ||
</td> | ||
<td align="center"> | ||
<div> | ||
<b> | ||
<a href="https://beta.observablehq.com/@spencermountain/compromise-accuracy"> | ||
86% | ||
</a> | ||
</b> | ||
<div> | ||
on the Penn treebank | ||
</div> | ||
</td> | ||
<td align="center"> | ||
<b>IE9+</b> | ||
<div> | ||
caniuse, youbetcha | ||
</div> | ||
</td> | ||
</tr> | ||
</table> | ||
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221731-e8b84800-ffb7-11e9-8453-6395e0e903fa.png"/> | ||
</div> | ||
<!-- Install section --> | ||
#### ⚡️ on the Client-side | ||
### .numbers(): | ||
interpret plaintext numbers | ||
```js | ||
nlp.extend(require('compromise-numbers')) | ||
let doc = nlp('ninety five thousand and fifty two') | ||
doc.numbers().add(2) | ||
doc.text() | ||
// 'ninety five thousand and fifty four' | ||
``` | ||
<div align="right"> | ||
<a href="https://docs.compromise.cool/compromise-values">number docs</a> | ||
</div> | ||
<div align="center"> | ||
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221814-05ed1680-ffb8-11e9-8b6b-c7528d163871.png"/> | ||
</div> | ||
### .topics(): | ||
grab subjects in a text: | ||
```js | ||
nlp.extend(require('compromise-entities')) | ||
let doc = nlp(buddyHolly) | ||
doc.people().if('mary').json() | ||
// [{text:'Mary Tyler Moore'}] | ||
let doc = nlp(freshPrince) | ||
doc.places().first().text() | ||
// 'West Phillidelphia' | ||
doc = nlp('the opera about richard nixon visiting china') | ||
doc.topics().json() | ||
// [ | ||
// { text: 'richard nixon' }, | ||
// { text: 'china' } | ||
// ] | ||
``` | ||
<div align="right"> | ||
<a href="https://docs.compromise.cool/topics-named-entity-recognition">topics docs</a> | ||
</div> | ||
<div align="center"> | ||
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221632-b9094000-ffb7-11e9-99e0-b48edd6cdf8a.png"/> | ||
</div> | ||
### .contractions(): | ||
work with contracted and implicit words: | ||
```js | ||
let doc = nlp("we're not gonna take it, no we ain't gonna take it.") | ||
// match an implicit term | ||
doc.has('going') // true | ||
// transform | ||
doc.contractions().expand() | ||
dox.text() | ||
// 'we are not going to take it, no we are not going to take it.' | ||
``` | ||
<div align="right"> | ||
<a href="https://docs.compromise.cool/compromise-contractions">contraction docs</a> | ||
</div> | ||
<div align="center"> | ||
<img src="https://user-images.githubusercontent.com/399657/68221731-e8b84800-ffb7-11e9-8453-6395e0e903fa.png"/> | ||
<!-- spacer --> | ||
<img height="30" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
</div> | ||
Use it on the client-side: | ||
```html | ||
<script src="https://unpkg.com/compromise@latest/builds/compromise.min.js"></script> | ||
<script src="https://unpkg.com/compromise"></script> | ||
<script> | ||
var doc = nlp('dinosaur') | ||
var str = doc.nouns().toPlural().out('text') | ||
var str = doc.nouns().toPlural().text() | ||
console.log(str) | ||
@@ -128,325 +202,472 @@ // 'dinosaurs' | ||
#### 🌋 Server-side! | ||
```javascript | ||
var nlp = require('compromise') | ||
or as an es-module: | ||
```typescript | ||
import nlp from 'compromise' | ||
var doc = nlp('London is calling') | ||
doc.sentences().toNegative() | ||
doc.verbs().toNegative() | ||
// 'London is not calling' | ||
``` | ||
<!-- spacer --> | ||
<img height="30" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
compromise is **170kb** (minified): | ||
<div align="center"> | ||
Get the hang of things: | ||
<!-- filesize --> | ||
<a href="https://bundlephobia.com/result?p=compromise"> | ||
<img width="600" src="https://user-images.githubusercontent.com/399657/68234819-14dfc300-ffd0-11e9-8b30-cb8545707b29.png"/> | ||
</a> | ||
</div> | ||
<table align="center"> | ||
<tr> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/tutorial-1"> | ||
Tutorial #1 | ||
</a> | ||
</div> | ||
<div align="center"> | ||
<sub>Input → output</sub> | ||
</div> | ||
</td> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/compromise-tutorial-2"> | ||
Tutorial #2 | ||
</a> | ||
</div> | ||
<div align="center"> | ||
<sub>Match & transform</sub> | ||
</div> | ||
</td> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/compromise-making-a-bot"> | ||
Tutorial #3 | ||
</a> | ||
</div> | ||
<div align="center"> | ||
<sub>Making a bot</sub> | ||
</div> | ||
</td> | ||
</tr> | ||
</table> | ||
it's pretty fast. It can run on keypress: | ||
<div align="center"> | ||
<a href="https://observablehq.com/@spencermountain/compromise-performance"> | ||
<img width="600" src="https://user-images.githubusercontent.com/399657/68234798-0abdc480-ffd0-11e9-9ac5-8875d185a631.png"/> | ||
</a> | ||
</div> | ||
it works mainly by <a href="https://observablehq.com/@spencermountain/verbs">conjugating many forms</a> of a basic word list. | ||
The final lexicon is ~14,000 words: | ||
<div align="center"> | ||
Detailed docs: | ||
<img width="600" src="https://user-images.githubusercontent.com/399657/68234805-0d201e80-ffd0-11e9-8dc6-f7a600352555.png"/> | ||
</div> | ||
<table align="center"> | ||
<tr> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/compromise-api"> | ||
API | ||
</a> | ||
</div> | ||
</td> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/compromise-tags"> | ||
Full Tagset | ||
</a> | ||
</div> | ||
</td> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/compromise-plugins"> | ||
Plugins | ||
</a> | ||
</div> | ||
</td> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/compromise-output"> | ||
Outputs | ||
</a> | ||
</div> | ||
</td> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/compromise-match-syntax"> | ||
Match Syntax | ||
</a> | ||
</div> | ||
</td> | ||
</tr> | ||
</table> | ||
## Examples: | ||
you can read more about how it works, [here](https://observablehq.com/@spencermountain/compromise-internals). | ||
<table> | ||
<tr> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/nlp-compromise"> | ||
Part-of-Speech tagging | ||
</a> | ||
</div> | ||
<div align="center"> | ||
<sub>nouns! verbs! adjectives!</sub> | ||
</div> | ||
</td> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/topics-named-entity-recognition"> | ||
Named-entities | ||
</a> | ||
</div> | ||
<div align="center"> | ||
<sub>people, places, organizations</sub> | ||
</div> | ||
</td> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/compromise-values"> | ||
Number parsing | ||
</a> | ||
</div> | ||
<div align="center"> | ||
<sub>seven hundred and fifty == 750</sub> | ||
</div> | ||
</td> | ||
</tr> | ||
<tr> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/tutorial-1"> | ||
Grammar-match | ||
</a> | ||
</div> | ||
<div align="center"> | ||
<sub>like a regex for a sentence</sub> | ||
</div> | ||
</td> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/verbs"> | ||
Verb conjugation | ||
</a> | ||
</div> | ||
<div align="center"> | ||
<sub>all your base are belong</sub> | ||
</div> | ||
</td> | ||
<td> | ||
<div align="center"> | ||
<a href="https://beta.observablehq.com/@spencermountain/compromise-normalization"> | ||
Normalization | ||
</a> | ||
</div> | ||
<div align="center"> | ||
<sub>case, whitespace, contractions..</sub> | ||
</div> | ||
</td> | ||
</tr> | ||
</table> | ||
<!-- spacer --> | ||
<!-- <img height="25px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> --> | ||
<div align="center"> | ||
<img src="https://user-images.githubusercontent.com/399657/68221814-05ed1680-ffb8-11e9-8b6b-c7528d163871.png"/> | ||
</div> | ||
* <a href="https://beta.observablehq.com/@spencermountain/nouns"><b>Plural/singular:</b></a> - grab the noun-phrases, make em plural: | ||
### .extend(): | ||
set a custom interpretation of your own words: | ||
```js | ||
doc = nlp('a bottle of beer on the wall.') | ||
doc.nouns(0).toPlural() | ||
doc.out('text') | ||
//'The bottles of beer on the wall.' | ||
let myWords = { | ||
kermit: 'FirstName', | ||
fozzie: 'FirstName', | ||
} | ||
let doc = nlp(muppetText, myWords) | ||
``` | ||
* <a href="https://beta.observablehq.com/@spencermountain/compromise-values"><b>Number parsing:</b></a> - parse written-out numbers, and change their form: | ||
or make more changes with a [compromise-plugin](https://observablehq.com/@spencermountain/compromise-plugins). | ||
```js | ||
doc = nlp('ninety five thousand and fifty two') | ||
doc.values().toNumber().out() | ||
// '95052' | ||
const nlp = require('compromise') | ||
doc = nlp('the 23rd of December') | ||
doc.values().add(2).toText() | ||
doc.out('text') | ||
// 'the twenty fifth of December' | ||
``` | ||
nlp.extend((Doc, world) => { | ||
// add new tags | ||
world.addTags({ | ||
Character: { | ||
isA: 'Person', | ||
notA: 'Adjective', | ||
}, | ||
}) | ||
* <a href="https://beta.observablehq.com/@spencermountain/compromise-normalization"><b>Normalization:</b></a> - handle looseness & variety of random text: | ||
```js | ||
doc = nlp("the guest-singer's björk at seven thirty.").normalize().out('text') | ||
// 'The guest singer is Bjork at 7:30.' | ||
``` | ||
// add or change words in the lexicon | ||
world.addWords({ | ||
kermit: 'Character', | ||
gonzo: 'Character', | ||
}) | ||
* <a href="https://beta.observablehq.com/@spencermountain/verbs"><b>Tense:</b></a> - switch to/from conjugations of any verb | ||
```js | ||
let doc = nlp('she sells seashells by the seashore.') | ||
doc.sentences().toFutureTense().out('text') | ||
//'she will sell seashells...' | ||
// add methods to run after the tagger | ||
world.postProcess(doc => { | ||
doc.match('light the lights').tag('#Verb . #Plural') | ||
}) | ||
doc.verbs().conjugate() | ||
// [{ PastTense: 'sold', | ||
// Infinitive: 'sell', | ||
// Gerund: 'selling', ... | ||
// }] | ||
// add a whole new method | ||
Doc.prototype.kermitVoice = function() { | ||
this.sentences().prepend('well,') | ||
this.match('i [(am|was)]').prepend('um,') | ||
return this | ||
} | ||
}) | ||
``` | ||
* <a href="https://github.com/spencermountain/compromise/wiki/Contractions"><b> Contractions:</b></a> - grab, expand and contract: | ||
```js | ||
doc = nlp("we're not gonna take it, no we ain't gonna take it.") | ||
doc.has('going') // true | ||
doc.match('are not').length // == 2 | ||
doc.contractions().expand().out() | ||
//'we are not going to take it, no we are not going to take it' | ||
``` | ||
<div align="right"> | ||
<a href="https://docs.compromise.cool/compromise-plugins">.extend() docs</a> | ||
</div> | ||
<div align="center"> | ||
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221848-11404200-ffb8-11e9-90cd-3adee8d8564f.png"/> | ||
</div> | ||
* <a href="https://beta.observablehq.com/@spencermountain/topics-named-entity-recognition"><b> Named-entities:</b></a> - get the people, places, organizations: | ||
```js | ||
doc = nlp('the opera about richard nixon visiting china') | ||
doc.topics().data() | ||
// [ | ||
// { text: 'richard nixon' }, | ||
// { text: 'china' } | ||
// ] | ||
``` | ||
### API: | ||
* <a href="https://github.com/spencermountain/compromise/wiki/Lexicon"><b>Custom lexicon:</b></a> - make it do what you'd like: | ||
```js | ||
var lexicon={ | ||
'boston': 'MusicalGroup' | ||
} | ||
doc = nlp('i heard Boston\'s set in Chicago', lexicon) | ||
##### Constructor | ||
//alternatively, fix it 'in-post': | ||
doc.match('heard #Possessive set').terms(1).tag('MusicalGroup') | ||
``` | ||
_(these methods are on the `nlp` object)_ | ||
* <a href="https://beta.observablehq.com/@spencermountain/compromise-output"><b> Handy outputs:</b></a> - get sensible data: | ||
```js | ||
doc = nlp('We like Roy! We like Roy!').sentences().out('array') | ||
// ['We like Roy!', 'We like Roy!'] | ||
- **[.tokenize()](https://observablehq.com/@spencermountain/compromise-tokenization)** - parse text without running POS-tagging | ||
- **[.extend()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - mix in a compromise-plugin | ||
- **[.clone()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - make a deep-copy of the library state | ||
- **[.load()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - re-generate a Doc object from .export() results | ||
- **[.verbose()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - log our decision-making for debugging | ||
- **[.version()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - current semver version of the library | ||
doc = nlp('Tony Hawk').out('html') | ||
/* | ||
<span> | ||
<span class="nl-Person nl-FirstName">Tony</span> | ||
<span> </span> | ||
<span class="nl-Person nl-LastName">Hawk</span> | ||
</span> | ||
*/ | ||
``` | ||
##### Utils | ||
<!-- plugins section --> | ||
* <a href="https://beta.observablehq.com/@spencermountain/compromise-plugins"><b> Plugins:</b></a> - allow adding vocabulary, fixing errors, and setting context quickly: | ||
```js | ||
var plugin = { | ||
tags:{ | ||
Character:{ | ||
isA: 'Noun' | ||
} | ||
}, | ||
words:{ | ||
itchy: 'Character', | ||
scratchy: 'Character' | ||
} | ||
} | ||
nlp.plugin(plugin) | ||
nlp(`Couldn't Itchy share his pie with Scratchy?`).debug() | ||
/* | ||
couldn't - #Modal, #Verb | ||
itchy - #Character, #Noun | ||
share - #Infinitive, #Verb | ||
... | ||
*/ | ||
``` | ||
- **[.all()](https://observablehq.com/@spencermountain/compromise-utils)** - return the whole original document ('zoom out') | ||
- **[.found](https://observablehq.com/@spencermountain/compromise-utils)** _[getter]_ - is this document empty? | ||
- **[.parent()](https://observablehq.com/@spencermountain/compromise-utils)** - return the previous result | ||
- **[.parents()](https://observablehq.com/@spencermountain/compromise-utils)** - return all of the previous results | ||
- **[.tagger()](https://observablehq.com/@spencermountain/compromise-tagger)** - (re-)run the part-of-speech tagger on this document | ||
- **[.wordCount()](https://observablehq.com/@spencermountain/compromise-utils)** - count the # of terms in the document | ||
- **[.length](https://observablehq.com/@spencermountain/compromise-utils)** _[getter]_ - count the # of characters in the document (string length) | ||
- **[.clone()](https://observablehq.com/@spencermountain/compromise-utils)** - deep-copy the document, so that no references remain | ||
- **[.cache({})](https://observablehq.com/@spencermountain/compromise-cache)** - freeze the current state of the document, for speed-purposes | ||
- **[.uncache()](https://observablehq.com/@spencermountain/compromise-cache)** - un-freezes the current state of the document, so it may be transformed | ||
<h3 align="center"> | ||
of course, there's <a href="https://beta.observablehq.com/@spencermountain/nlp-compromise">a lot more stuff</a>. | ||
</h3> | ||
<h4 align="center"> | ||
<b>Join in -</b> | ||
we're fun, using <b>semver</b>, and moving fast: | ||
</h4> | ||
##### Accessors | ||
<table> | ||
<tr align="center"> | ||
<td> | ||
<a href="https://twitter.com/nlp_compromise"> | ||
<img src="https://cloud.githubusercontent.com/assets/399657/21956672/a30cf206-da53-11e6-8c6c-0995cf2aef62.jpg"/> | ||
<div> Twitter </div> | ||
</a> | ||
</td> | ||
<td> | ||
<a href="https://gitter.im/nlpcompromise/community"> | ||
<img src="https://user-images.githubusercontent.com/399657/59966970-9c310c00-94f1-11e9-97fb-e5767b5f3f0e.png"/> | ||
<div> Gitter chat </div> | ||
</a> | ||
</td> | ||
<td> | ||
<a href="https://stackoverflow.com/questions/tagged/nlp-compromise"> | ||
<img src="https://user-images.githubusercontent.com/399657/59967058-17df8880-94f3-11e9-8d4f-1423bdfd8508.png"/> | ||
<div> Stackoverflow </div> | ||
</a> | ||
</td> | ||
<td> | ||
<a href="https://github.com/nlp-compromise/nlp_compromise/wiki/Projects"> | ||
<img src="https://cloud.githubusercontent.com/assets/399657/26513481/a755ac38-4239-11e7-960a-1c26d85ddc1c.png"/> | ||
<div> Projects </div> | ||
</a> | ||
</td> | ||
<td> | ||
<a href="https://github.com/spencermountain/compromise/wiki/Contributing"> | ||
<img src="https://cloud.githubusercontent.com/assets/399657/21956742/5985a89c-da55-11e6-87bc-4f0f1549d202.jpg"/> | ||
<div> Pull-requests </div> | ||
</a> | ||
</td> | ||
</tr> | ||
</table> | ||
- **[.first(n)](https://observablehq.com/@spencermountain/compromise-accessors)** - use only the first result(s) | ||
- **[.last(n)](https://observablehq.com/@spencermountain/compromise-accessors)** - use only the last result(s) | ||
- **[.slice(n,n)](https://observablehq.com/@spencermountain/compromise-accessors)** - grab a subset of the results | ||
- **[.eq(n)](https://observablehq.com/@spencermountain/compromise-accessors)** - use only the nth result | ||
- **[.firstTerm()](https://observablehq.com/@spencermountain/compromise-accessors)** - get the first word in each match | ||
- **[.lastTerm()](https://observablehq.com/@spencermountain/compromise-accessors)** - get the end word in each match | ||
- **[.termList()](https://observablehq.com/@spencermountain/compromise-accessors)** - return a flat list of all Term objects in match | ||
<div align="left"> | ||
<a href="https://www.youtube.com/watch?v=WuPVS2tCg8s"> | ||
<img width="300" src="http://img.youtube.com/vi/WuPVS2tCg8s/mqdefault.jpg"/> | ||
</a> | ||
<a href="https://www.youtube.com/watch?v=c_hmwFwvO0U"> | ||
<img width="300" src="https://user-images.githubusercontent.com/399657/27890263-88e1fd10-61bf-11e7-93f2-745167f88d58.png"/> | ||
</a> | ||
##### Match | ||
_(all match methods use the [match-syntax](https://docs.compromise.cool/compromise-match-syntax).)_ | ||
- **[.match('')](https://observablehq.com/@spencermountain/compromise-match)** - return a new Doc, with this one as a parent | ||
- **[.not('')](https://observablehq.com/@spencermountain/compromise-match)** - return all results except for this | ||
- **[.matchOne('')](https://observablehq.com/@spencermountain/compromise-match)** - return only the first match | ||
- **[.if('')](https://observablehq.com/@spencermountain/compromise-match)** - return each current phrase, only if it contains this match ('only') | ||
- **[.ifNo('')](https://observablehq.com/@spencermountain/compromise-match)** - Filter-out any current phrases that have this match ('notIf') | ||
- **[.has('')](https://observablehq.com/@spencermountain/compromise-match)** - Return a boolean if this match exists | ||
- **[.lookBehind('')](https://observablehq.com/@spencermountain/compromise-match)** - search through earlier terms, in the sentence | ||
- **[.lookAhead('')](https://observablehq.com/@spencermountain/compromise-match)** - search through following terms, in the sentence | ||
- **[.before('')](https://observablehq.com/@spencermountain/compromise-match)** - return all terms before a match, in each phrase | ||
- **[.after('')](https://observablehq.com/@spencermountain/compromise-match)** - return all terms after a match, in each phrase | ||
- **[.lookup([])](https://observablehq.com/@spencermountain/compromise-match)** - quick find for an array of string matches | ||
##### Case | ||
- **[.toLowerCase()](https://observablehq.com/@spencermountain/compromise-case)** - turn every letter of every term to lower-cse | ||
- **[.toUpperCase()](https://observablehq.com/@spencermountain/compromise-case)** - turn every letter of every term to upper case | ||
- **[.toTitleCase()](https://observablehq.com/@spencermountain/compromise-case)** - upper-case the first letter of each term | ||
- **[.toCamelCase()](https://observablehq.com/@spencermountain/compromise-case)** - remove whitespace and title-case each term | ||
##### Whitespace | ||
- **[.pre('')](https://observablehq.com/@spencermountain/compromise-whitespace)** - add this punctuation or whitespace before each match | ||
- **[.post('')](https://observablehq.com/@spencermountain/compromise-whitespace)** - add this punctuation or whitespace after each match | ||
- **[.trim()](https://observablehq.com/@spencermountain/compromise-whitespace)** - remove start and end whitespace | ||
- **[.hyphenate()](https://observablehq.com/@spencermountain/compromise-whitespace)** - connect words with hyphen, and remove whitespace | ||
- **[.dehyphenate()](https://observablehq.com/@spencermountain/compromise-whitespace)** - remove hyphens between words, and set whitespace | ||
##### Tag | ||
- **[.tag('')](https://observablehq.com/@spencermountain/compromise-tagger)** - Give all terms the given tag | ||
- **[.tagSafe('')](https://observablehq.com/@spencermountain/compromise-tagger)** - Only apply tag to terms if it is consistent with current tags | ||
- **[.unTag('')](https://observablehq.com/@spencermountain/compromise-tagger)** - Remove this term from the given terms | ||
- **[.canBe('')](https://observablehq.com/@spencermountain/compromise-tagger)** - return only the terms that can be this tag | ||
##### Loops | ||
- **[.map(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - run each phrase through a function, and create a new document | ||
- **[.forEach(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - run a function on each phrase, as an individual document | ||
- **[.filter(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - return only the phrases that return true | ||
- **[.find(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - return a document with only the first phrase that matches | ||
- **[.some(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - return true or false if there is one matching phrase | ||
- **[.random(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - sample a subset of the results | ||
##### Insert | ||
- **[.replace(match, replace)](https://observablehq.com/@spencermountain/compromise-insert)** - search and replace match with new content | ||
- **[.replaceWith(replace)](https://observablehq.com/@spencermountain/compromise-insert)** - substitute-in new text | ||
- **[.delete()](https://observablehq.com/@spencermountain/compromise-insert)** - fully remove these terms from the document | ||
- **[.append(str)](https://observablehq.com/@spencermountain/compromise-insert)** - add these new terms to the end (insertAfter) | ||
- **[.prepend(str)](https://observablehq.com/@spencermountain/compromise-insert)** - add these new terms to the front (insertBefore) | ||
- **[.concat()](https://observablehq.com/@spencermountain/compromise-insert)** - add these new things to the end | ||
##### Transform | ||
- **[.sort('method')](https://observablehq.com/@spencermountain/compromise-sorting)** - re-arrange the order of the matches (in place) | ||
- **[.reverse()](https://observablehq.com/@spencermountain/compromise-sorting)** - reverse the order of the matches, but not the words | ||
- **[.normalize({})](https://observablehq.com/@spencermountain/compromise-normalization)** - clean-up the text in various ways | ||
- **[.unique()](https://observablehq.com/@spencermountain/compromise-sorting)** - remove any duplicate matches | ||
- **[.split('')](https://observablehq.com/@spencermountain/compromise-split)** - return a Document with three parts for every match ('splitOn') | ||
- **[.splitBefore('')](https://observablehq.com/@spencermountain/compromise-split)** - partition a phrase before each matching segment | ||
- **[.splitAfter('')](https://observablehq.com/@spencermountain/compromise-split)** - partition a phrase after each matching segment | ||
- **[.segment({})](https://observablehq.com/@spencermountain/compromise-split)** - split a document into labeled sections | ||
- **[.join('')](https://observablehq.com/@spencermountain/compromise-split)** - make all phrases into one phrase | ||
##### Output | ||
- **[.text('method')](https://observablehq.com/@spencermountain/compromise-output)** - return the document as text | ||
- **[.json({})](https://observablehq.com/@spencermountain/compromise-json)** - pull out desired metadata from the document | ||
- **[.out('array|offset|terms')](https://observablehq.com/@spencermountain/compromise-output)** - some named output formats (deprecated) | ||
- **[.debug()](https://observablehq.com/@spencermountain/compromise-output)** - pretty-print the current document and its tags | ||
- **[.export()](https://observablehq.com/@spencermountain/compromise-export)** - store a parsed document for later use | ||
##### Selections | ||
- **[.terms()](https://observablehq.com/@spencermountain/compromise-selections)** - split-up results by each individual term | ||
- **[.clauses()](https://observablehq.com/@spencermountain/compromise-selections)** - split-up sentences into multi-term phrases | ||
- **[.hyphenated()](https://observablehq.com/@spencermountain/compromise-selections)** - all terms connected with a hyphen or dash like `'wash-out'` | ||
- **[.phoneNumbers()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'(939) 555-0113'` | ||
- **[.hashTags()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'#nlp'` | ||
- **[.emails()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'hi@compromise.cool'` | ||
- **[.emoticons()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `:)` | ||
- **[.emojis()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `💋` | ||
- **[.atMentions()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'@nlp_compromise'` | ||
- **[.urls()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'compromise.cool'` | ||
- **[.adverbs()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'quickly'` | ||
- **[.pronouns()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'he'` | ||
- **[.conjunctions()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'but'` | ||
- **[.prepositions()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'of'` | ||
- **[.abbreviations()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'Mrs.'` | ||
##### Subsets | ||
- **[.contractions()](https://observablehq.com/@spencermountain/compromise-selections)** - things like "didn't" | ||
- **[.parentheses()](https://observablehq.com/@spencermountain/compromise-selections)** - return anything inside (parentheses) | ||
- **[.possessives()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `"Spencer's"` | ||
- **[.quotations()](https://observablehq.com/@spencermountain/compromise-selections)** - return any terms inside quotation marks | ||
- **[.acronyms()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'FBI'` | ||
- **[.lists()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'eats, shoots, and leaves'` | ||
- **[.nouns()](https://observablehq.com/@spencermountain/nouns)** - return any subsequent terms tagged as a Noun | ||
- **[.nouns().json()](https://observablehq.com/@spencermountain/nouns)** - overloaded output with noun metadata | ||
- **[.nouns().toPlural()](https://observablehq.com/@spencermountain/nouns)** - `'football captain' → 'football captains'` | ||
- **[.nouns().toSingular()](https://observablehq.com/@spencermountain/nouns)** - `'turnovers' → 'turnover'` | ||
- **[.nouns().isPlural()](https://observablehq.com/@spencermountain/nouns)** - return only plural nouns | ||
- **[.nouns().isSingular()](https://observablehq.com/@spencermountain/nouns)** - return only singular nouns | ||
- **[.nouns().hasPlural()](https://observablehq.com/@spencermountain/nouns)** - return only nouns that _can be_ inflected as plural | ||
- **[.nouns().toPossessive()](https://observablehq.com/@spencermountain/nouns)** - add a `'s` to the end, in a safe manner. | ||
- **[.verbs()](https://observablehq.com/@spencermountain/verbs)** - return any subsequent terms tagged as a Verb | ||
- **[.verbs().json()](https://observablehq.com/@spencermountain/verbs)** - overloaded output with verb metadata | ||
- **[.verbs().conjugate()](https://observablehq.com/@spencermountain/verbs)** - return all forms of these verbs | ||
- **[.verbs().toPastTense()](https://observablehq.com/@spencermountain/verbs)** - `'will go' → 'went'` | ||
- **[.verbs().toPresentTense()](https://observablehq.com/@spencermountain/verbs)** - `'walked' → 'walks'` | ||
- **[.verbs().toFutureTense()](https://observablehq.com/@spencermountain/verbs)** - `'walked' → 'will walk'` | ||
- **[.verbs().toInfinitive()](https://observablehq.com/@spencermountain/verbs)** - `'walks' → 'walk'` | ||
- **[.verbs().toGerund()](https://observablehq.com/@spencermountain/verbs)** - `'walks' → 'walking'` | ||
- **[.verbs().toNegative()](https://observablehq.com/@spencermountain/verbs)** - `'went' → 'did not go'` | ||
- **[.verbs().toPositive()](https://observablehq.com/@spencermountain/verbs)** - `"didn't study" → 'studied'` | ||
- **[.verbs().isNegative()](https://observablehq.com/@spencermountain/verbs)** - return verbs with 'not' | ||
- **[.verbs().isPositive()](https://observablehq.com/@spencermountain/verbs)** - only verbs without 'not' | ||
- **[.verbs().isPlural()](https://observablehq.com/@spencermountain/verbs)** - return plural verbs like 'we walk' | ||
- **[.verbs().isSingular()](https://observablehq.com/@spencermountain/verbs)** - return singular verbs like 'spencer walks' | ||
- **[.verbs().adverbs()](https://observablehq.com/@spencermountain/verbs)** - return the adverbs describing this verb. | ||
<div align="center"> | ||
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221824-09809d80-ffb8-11e9-9ef0-6ed3574b0ce8.png"/> | ||
</div> | ||
<p></p> | ||
### Plugins: | ||
These are some helpful extensions: | ||
##### Adjectives | ||
`npm install compromise-adjectives` | ||
- **[.adjectives()](https://observablehq.com/@spencermountain/compromise-adjectives)** - like `quick` | ||
- **[.adjectives().json()](https://observablehq.com/@spencermountain/compromise-adjectives)** - overloaded output with adjective metadata | ||
- **[.adjectives().conjugate()](https://observablehq.com/@spencermountain/compromise-adjectives)** - return all conjugated forms of this adjective | ||
- **[.adjectives().toSuperlative()](https://observablehq.com/@spencermountain/compromise-adjectives)** - convert `quick` to `quickest` | ||
- **[.adjectives().toComparative()](https://observablehq.com/@spencermountain/compromise-adjectives)** - convert `quick` to `quickest` | ||
- **[.adjectives().toAdverb()](https://observablehq.com/@spencermountain/compromise-adjectives)** - convert `quick` to `quickly` | ||
- **[.adjectives().toVerb()](https://observablehq.com/@spencermountain/compromise-adjectives)** - convert `quick` to `quicken` | ||
- **[.adjectives().toNoun()](https://observablehq.com/@spencermountain/compromise-adjectives)** - convert `quick` to `quickness` | ||
##### Dates | ||
`npm install compromise-dates` | ||
- **[.dates()](https://observablehq.com/@spencermountain/compromise-dates)** - find dates like `June 8th` or `03/03/18` | ||
- **[.dates().json()](https://observablehq.com/@spencermountain/compromise-dates)** - overloaded output with date metadata | ||
- **[.dates().format('')](https://observablehq.com/@spencermountain/compromise-dates)** - convert the dates to specific formats | ||
##### Topics | ||
`npm install compromise-topics` | ||
- **[.people()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - names like 'John F. Kennedy' | ||
- **[.places()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - like 'Paris, France' | ||
- **[.organizations()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - like 'Google, Inc' | ||
- **[.topics()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - `people()` + `places()` + `organizations()` | ||
##### Numbers | ||
`npm install compromise-numbers` | ||
- **[.numbers()](https://observablehq.com/@spencermountain/compromise-values)** - grab all written and numeric values | ||
- **[.numbers().json()](https://observablehq.com/@spencermountain/compromise-values)** - overloaded output with number metadata | ||
- **[.money()](https://observablehq.com/@spencermountain/compromise-values)** - things like `'$2.50'` | ||
- **[.fractions()](https://observablehq.com/@spencermountain/compromise-values)** - things like `1/3rd` | ||
- **[.numbers().toText()](https://observablehq.com/@spencermountain/compromise-values)** - convert number to `five` or `fifth` | ||
- **[.numbers().toNumber()](https://observablehq.com/@spencermountain/compromise-values)** - convert number to `5` or `5th` | ||
- **[.numbers().toOrdinal()](https://observablehq.com/@spencermountain/compromise-values)** - convert number to `fifth` or `5th` | ||
- **[.numbers().toCardinal()](https://observablehq.com/@spencermountain/compromise-values)** - convert number to `five` or `5` | ||
- **[.numbers().add(n)](https://observablehq.com/@spencermountain/compromise-values)** - increase number by n | ||
- **[.numbers().subtract(n)](https://observablehq.com/@spencermountain/compromise-values)** - decrease number by n | ||
- **[.numbers().increment()](https://observablehq.com/@spencermountain/compromise-values)** - increase number by 1 | ||
- **[.numbers().decrement()](https://observablehq.com/@spencermountain/compromise-values)** - decrease number by 1 | ||
- **[.numbers().isEqual(n)](https://observablehq.com/@spencermountain/compromise-values)** - return numbers with this value | ||
- **[.numbers().greaterThan(min)](https://observablehq.com/@spencermountain/compromise-values)** - return numbers bigger than n | ||
- **[.numbers().lessThan(max)](https://observablehq.com/@spencermountain/compromise-values)** - return numbers smaller than n | ||
- **[.numbers().between(min, max)](https://observablehq.com/@spencermountain/compromise-values)** - return numbers between min and max | ||
- **[.numbers().isOrdinal()](https://observablehq.com/@spencermountain/compromise-values)** - return only ordinal numbers | ||
- **[.numbers().isCardinal()](https://observablehq.com/@spencermountain/compromise-values)** - return only cardinal numbers | ||
- **[.numbers().toLocaleString()](https://observablehq.com/@spencermountain/compromise-values)** - add commas, or nicer formatting for numbers | ||
##### Ngrams | ||
`npm install compromise-ngrams` | ||
- **[.ngrams({})](https://observablehq.com/@spencermountain/compromise-ngram)** - list all repeating sub-phrases, by word-count | ||
- **[.unigrams()](https://observablehq.com/@spencermountain/compromise-ngram)** - n-grams with one word | ||
- **[.bigrams()](https://observablehq.com/@spencermountain/compromise-ngram)** - n-grams with two words | ||
- **[.trigrams()](https://observablehq.com/@spencermountain/compromise-ngram)** - n-grams with three words | ||
- **[.startgrams()](https://observablehq.com/@spencermountain/compromise-ngram)** - n-grams including the first term of a phrase | ||
- **[.endgrams()](https://observablehq.com/@spencermountain/compromise-ngram)** - n-grams including the last term of a phrase | ||
- **[.edgegrams()](https://observablehq.com/@spencermountain/compromise-ngram)** - n-grams including the first or last term of a phrase | ||
##### Output | ||
`npm install compromise-output` | ||
- **[.hash()](#)** - generate an md5 hash from the document+tags | ||
- **[.html({})]()** - generate sanitized html from the document | ||
##### Paragraphs | ||
`npm install compromise-paragraphs` | ||
this plugin creates a wrapper around the default sentence objects. | ||
- **[.paragraphs()](#)** - return groups of sentences | ||
- **[.paragraphs().json()](#)** - output metadata for each paragraph | ||
- **[.paragraphs().sentences()](#)** - go back to a regular Doc object | ||
- **[.paragraphs().terms()](#)** - | ||
- **[.paragraphs().eq()](#)** - | ||
- **[.paragraphs().first()](#)** - | ||
- **[.paragraphs().last()](#)** - | ||
- **[.paragraphs().match()](#)** - | ||
- **[.paragraphs().not()](#)** - | ||
- **[.paragraphs().if()](#)** - | ||
- **[.paragraphs().ifNo()](#)** - | ||
- **[.paragraphs().has()](#)** - | ||
- **[.paragraphs().forEach()](#)** - | ||
- **[.paragraphs().map()](#)** - | ||
- **[.paragraphs().filter()](#)** - | ||
##### Sentences | ||
`npm install compromise-sentences` | ||
- **[.sentences()](#)** - return a sentence class with additional methods | ||
- **[.sentences().json()](#)** - overloaded output with sentence metadata | ||
- **[.sentences().subjects()](#)** - return the main noun of each sentence | ||
- **[.sentences().toPastTense()](#)** - | ||
- **[.sentences().toPresentTense()](#)** - | ||
- **[.sentences().toFutureTense()](#)** - | ||
- **[.sentences().toContinuous()](#)** - | ||
- **[.sentences().toNegative()](#)** - | ||
- **[.sentences().toPositive()](#)** - | ||
- **[.sentences().isPassive()](#)** - | ||
- **[.sentences().isQuestion()](#)** - return questions with a `?` | ||
- **[.sentences().isExclamation()](#)** - return sentences with a `!` | ||
- **[.sentences().isStatement()](#)** - return sentences without `?` or `!` | ||
- **[.sentences().prepend()](#)** - smarter prepend that repairs whitespace + titlecasing | ||
- **[.sentences().append()](#)** - smarter append that repairs sentence punctuation | ||
- **[.sentences().toExclamation()](#)** - | ||
- **[.sentences().toQuestion()](#)** - | ||
- **[.sentences().toStatement()](#)** - | ||
- | ||
##### Syllables | ||
`npm install compromise-syllables` | ||
- **[.syllables()](https://observablehq.com/@spencermountain/compromise-syllables)** - split each term by its typical pronounciation | ||
<!-- spacer --> | ||
<div > | ||
<img height="25px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
<hr/> | ||
<img height="25px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
</div> | ||
<!-- spacer --> | ||
<div > | ||
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
</div> | ||
### Docs: | ||
##### Tutorials: | ||
- **[Tutorial #1](https://docs.compromise.cool/tutorial-1)** - Input → output | ||
- **[Tutorial #2](https://docs.compromise.cool/compromise-tutorial-2)** - Match & transform | ||
- **[Tutorial #3](https://docs.compromise.cool/compromise-making-a-bot)** - Making a chat-bot | ||
<!-- * **[Tutorial #4]()** - Making a plugin --> | ||
##### 3rd party: | ||
- **[Geocoding Social Conversations with NLP and JavaScript](http://compromise.cool)** - by Microsoft | ||
- **[Microservice Recipe](https://eventn.com/recipes/text-parsing-with-nlp-compromise)** - by Eventn | ||
- **[Building Text-Based Games](https://killalldefects.com/2019/09/24/building-text-based-games-with-compromise-nlp/)** - by Matt Eland | ||
- **[Fun with javascript in BigQuery](https://medium.com/@hoffa/new-in-bigquery-persistent-udfs-c9ea4100fd83#6e09)** - by Felipe Hoffa | ||
##### Talks: | ||
- **[Language as an Interface](https://www.youtube.com/watch?v=WuPVS2tCg8s)** - by Spencer Kelly | ||
- **[Coding Chat Bots](https://www.youtube.com/watch?v=c_hmwFwvO0U)** - by KahWee Teng | ||
<div align="center"> | ||
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221824-09809d80-ffb8-11e9-9ef0-6ed3574b0ce8.png"/> | ||
</div> | ||
##### Some fun Applications: | ||
- **[Chat dialogue framework](http://superscriptjs.com/)** - by Rob Ellis | ||
- **[Automated Bechdel Test](https://github.com/guardian/bechdel-test)** - by The Guardian | ||
- **[Story generation framework](https://perchance.org/welcome)** - by Jose Phrocca | ||
- **[Tumbler blog of lists](https://leanstooneside.tumblr.com/)** - horse-ebooks-like lists - by Michael Paulukonis | ||
- **[Video Editing from Transcription](https://newtheory.io/)** - by New Theory | ||
- **[Browser extension Fact-checking](https://github.com/AlexanderKidd/FactoidL)** - by Alexander Kidd | ||
- **[Siri shortcut](https://routinehub.co/shortcut/3260)** - by Michael Byrns | ||
- **[Amazon skill](https://github.com/tajddin/voiceplay)** - by Tajddin Maghni | ||
- **[Tasking Slack-bot](https://github.com/kevinsuh/toki)** - by Kevin Suh | ||
<!-- spacer --> | ||
<div align="center"> | ||
<img height="25px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
<hr/> | ||
</div> | ||
#### Limitations: | ||
- **slash-support:** | ||
We currently split slashes up as different words, like we do for hyphens. so things like this don't work: | ||
<code>nlp('the koala eats/shoots/leaves').has('koala leaves') //false</code> | ||
- **inter-sentence match:** | ||
By default, sentences are the top-level abstraction. | ||
Inter-sentence, or multi-sentence matches aren't supported: | ||
<code>nlp("that's it. Back to Winnipeg!").has('it back')//false</code> | ||
- **nested match syntax:** | ||
the <s>danger</s> beauty of regex is that you can recurse indefinitely. | ||
Our match syntax is much weaker. Things like this are not <i>(yet)</i> possible: | ||
<code>doc.match('(modern (major|minor))? general')</code> | ||
complex matches must be achieved with successive **.match()** statements. | ||
- **dependency parsing:** | ||
Proper sentence transformation requires understanding the [syntax tree](https://en.wikipedia.org/wiki/Parse_tree) of a sentence, which we don't currently do. | ||
We should! Help wanted with this. | ||
##### FAQ | ||
<ul align="left"> | ||
@@ -458,10 +679,10 @@ <p> | ||
<ul> | ||
yeah! | ||
yeah it is! | ||
<br/> | ||
it wasn't built to compete with the stanford tagger, and may not fit every project. | ||
it wasn't built to compete with NLTK, and may not fit every project. | ||
<br/> | ||
string stuff is synchronous too, and parallelizing is weird. | ||
string processing is synchronous too, and parallelizing node processes is weird. | ||
<br/> | ||
See <a href="https://beta.observablehq.com/@spencermountain/compromise-performance">here</a> for information about speed & performance, and | ||
<a href="https://github.com/spencermountain/compromise/wiki/Justification">here></a> for project motivations | ||
See <a href="https://observablehq.com/@spencermountain/compromise-performance">here</a> for information about speed & performance, and | ||
<a href="https://observablehq.com/@spencermountain/compromise-justification">here</a> for project motivations | ||
</ul> | ||
@@ -478,3 +699,3 @@ <p></p> | ||
<br/> | ||
Read <a href="https://github.com/spencermountain/compromise/wiki/QuickStart">quickStart</a> for all sorts of funny environments. | ||
Read <a href="https://observablehq.com/@spencermountain/compromise-quickstart">quick start</a> for running compromise in workers, mobile apps, and all sorts of funny environments. | ||
</ul> | ||
@@ -486,9 +707,8 @@ <p></p> | ||
<details> | ||
<summary>🌎 Other Languages?</summary> | ||
<summary>🌎 Compromise in other Languages?</summary> | ||
<p></p> | ||
<ul> | ||
okay! <br/> | ||
we've got work-in-progress forks for <a href="https://github.com/nlp-compromise/de-compromise">German</a> and <a href="https://github.com/nlp-compromise/fr-compromise">French</a>, in the same philosophy. | ||
<br/> | ||
Get involved! | ||
and need some help. | ||
</ul> | ||
@@ -503,8 +723,7 @@ <p></p> | ||
<ul> | ||
compromise is one function so can't really be tree-shaken. | ||
<br/> .. and the tagging methods are competitive, so it's not recommended to pull things out. | ||
<br/> | ||
It's best to load the library fully, given it's smaller than <a href="https://68.media.tumblr.com/tumblr_m674jlpyPT1ry8fquo1_250.gif">this gif</a>. | ||
<br/> | ||
A plug-in scheme is in the works. | ||
compromise isn't easily tree-shaken. | ||
<br/> | ||
the tagging methods are competitive, and greedy, so it's not recommended to pull things out. | ||
<br/> | ||
It's recommended to run the library fully. | ||
</ul> | ||
@@ -516,23 +735,18 @@ <p></p> | ||
<hr/> | ||
<div align="center"> | ||
<img src="https://user-images.githubusercontent.com/399657/68221731-e8b84800-ffb7-11e9-8453-6395e0e903fa.png"/> | ||
</div> | ||
### Also: | ||
* **[naturalNode](https://github.com/NaturalNode/natural)** - fancier statistical nlp in javascript | ||
* **[superScript](http://superscriptjs.com/)** - clever conversation engine in js | ||
* **[nodeBox Linguistics](https://www.nodebox.net/code/index.php/Linguistics)** - conjugation, inflection in javascript | ||
* **[reText](https://github.com/wooorm/retext)** - very impressive [text utilities](https://github.com/wooorm/retext/blob/master/doc/plugins.md) in javascript | ||
* **[jsPos](https://code.google.com/archive/p/jspos/)** - javascript build of the time-tested Brill-tagger | ||
* **[spaCy](https://spacy.io/)** - speedy, multilingual tagger in C/python | ||
#### See Also: | ||
For the former promise-library, see [jnewman/compromise](https://github.com/jnewman/compromise) | ||
(Thanks [Joshua](https://github.com/jnewman)!) | ||
- **[naturalNode](https://github.com/NaturalNode/natural)** - fancier statistical nlp in javascript | ||
- **[superScript](http://superscriptjs.com/)** - clever conversation engine in js | ||
- **[nodeBox linguistics](https://www.nodebox.net/code/index.php/Linguistics)** - conjugation, inflection in javascript | ||
- **[reText](https://github.com/wooorm/retext)** - very impressive [text utilities](https://github.com/wooorm/retext/blob/master/doc/plugins.md) in javascript | ||
- **[jsPos](https://code.google.com/archive/p/jspos/)** - javascript build of the time-tested Brill-tagger | ||
- **[spaCy](https://spacy.io/)** - speedy, multilingual tagger in C/python | ||
<div align="right"> | ||
(and don't forget 🙇 | ||
<a href="http://www.nltk.org/">NLTK</a>, | ||
<a href="https://gate.ac.uk">GATE</a>, | ||
<a href="http://nlp.stanford.edu/software/lex-parser.shtml">Stanford</a>, | ||
and | ||
<a href="http://cogcomp.cs.illinois.edu/page/software/">Illinois</a> libs | ||
) | ||
</div> | ||
<img height="25px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> | ||
<b>MIT</b> | ||
<div align="right">by <a href="https://spencermounta.in/">spencermountain</a></div> |
@@ -1,189 +0,227 @@ | ||
declare function compromise(text: string, lexicon?: { [word: string]: string }): compromise.Text; | ||
declare namespace compromise { | ||
const version: string; | ||
function verbose(str: any): void; | ||
function tokenize(str: string): any; | ||
function plugin(obj: any): any; | ||
function clone(): any; | ||
function unpack(...args: any[]): void; | ||
function addWords(...args: any[]): void; | ||
function addTags(...args: any[]): void; | ||
function addRegex(...args: any[]): void; | ||
function addPatterns(...args: any[]): void; | ||
function addPlurals(...args: any[]): void; | ||
function addConjugations(conj: { [verb: string]: Conjugation }): void; | ||
export as namespace nlp | ||
interface Conjugation { | ||
Gerund?: string; | ||
PresentTense?: string; | ||
PastTense?: string; | ||
PerfectTense?: string; | ||
PluPerfectTense?: string; | ||
FuturePerfect?: string; | ||
Actor?: string; | ||
} | ||
declare function nlp(text: string): nlp.Document | ||
interface Text { | ||
/** did it find anything? */ | ||
readonly found: boolean; | ||
/** just a handy wrap */ | ||
readonly parent: Text; | ||
/** how many Texts are there? */ | ||
readonly length: number; | ||
/** nicer than constructor.call.name or whatever */ | ||
readonly isA: 'Text'; | ||
/** the whitespace before and after this match */ | ||
readonly whitespace: { | ||
before(str: string): Text, | ||
after(str: string): Text | ||
}; | ||
// Constructor | ||
declare module nlp { | ||
/** parse text into a compromise object, without running POS-tagging */ | ||
export function tokenize(text: string): Document | ||
/** mix in a compromise-plugin */ | ||
export function extend(plugin: any): Document | ||
/** make a deep-copy of the library state */ | ||
export function clone(): Document | ||
/** re-generate a Doc object from .json() results */ | ||
export function load(json: any): Document | ||
/** log our decision-making for debugging */ | ||
export function verbose(bool: boolean): Document | ||
/** current semver version of the library */ | ||
export const version: Document | ||
acronyms(...args: any[]): any; | ||
adjectives(...args: any[]): any; | ||
adverbs(...args: any[]): any; | ||
contractions(...args: any[]): any; | ||
dates(...args: any[]): any; | ||
nouns(...args: any[]): any; | ||
people(...args: any[]): any; | ||
sentences(...args: any[]): any; | ||
terms(...args: any[]): any; | ||
possessives(...args: any[]): any; | ||
values(...args: any[]): any; | ||
verbs(...args: any[]): any; | ||
ngrams(...args: any[]): any; | ||
startGrams(...args: any[]): any; | ||
endGrams(...args: any[]): any; | ||
words(...args: any[]): any; | ||
class Document { | ||
// Utils | ||
/** return the whole original document ('zoom out') */ | ||
all(): Document | ||
/** is this document empty? */ | ||
found: Boolean | ||
/** return the previous result */ | ||
parent(): Document | ||
/** return all of the previous results */ | ||
parents(): Document[] | ||
/** (re)run the part-of-speech tagger on this document */ | ||
tagger(): Document | ||
/** count the # of terms in each match */ | ||
wordCount(): Number | ||
/** count the # of characters of each match */ | ||
length(): Number | ||
/** deep-copy the document, so that no references remain */ | ||
clone(shallow?: Boolean): Document | ||
/** freeze the current state of the document, for speed-purposes */ | ||
cache(options?: Object): Document | ||
/** un-freezes the current state of the document, so it may be transformed */ | ||
uncache(options?: Object): Document | ||
// misc | ||
all(...args: any[]): any; | ||
index(...args: any[]): any; | ||
wordCount(...args: any[]): any; | ||
data(...args: any[]): any; | ||
/* javascript array loop-wrappers */ | ||
map(...args: any[]): any; | ||
forEach(...args: any[]): any; | ||
filter(...args: any[]): any; | ||
reduce(...args: any[]): any; | ||
find(...args: any[]): any; | ||
/** copy data properly so later transformations will have no effect */ | ||
clone(...args: any[]): any; | ||
// Accessors | ||
/** use only the first result(s) */ | ||
first(n?: Number): Document | ||
/** use only the last result(s) */ | ||
last(n?: Number): Document | ||
/** grab a subset of the results */ | ||
slice(start: Number, end?: Number): Document | ||
/** use only the nth result */ | ||
eq(n: Number): Document | ||
/** get the first word in each match */ | ||
firstTerm(): Document | ||
/** get the end word in each match */ | ||
lastTerm(): Document | ||
/** return a flat list of all Term objects in match */ | ||
termList(): any | ||
/** get the nth term of each result */ | ||
term(...args: any[]): any; | ||
firstTerm(...args: any[]): any; | ||
lastTerm(...args: any[]): any; | ||
// Match | ||
/** return a new Doc, with this one as a parent */ | ||
match(match: String | Document): Document | ||
/** return all results except for this */ | ||
not(match: String | Document): Document | ||
/** return only the first match */ | ||
matchOne(match: String | Document): Document | ||
/** return each current phrase, only if it contains this match */ | ||
if(match: String | Document): Document | ||
/** Filter-out any current phrases that have this match */ | ||
ifNo(match: String | Document): Document | ||
/** Return a boolean if this match exists */ | ||
has(match: String | Document): Document | ||
/** search through earlier terms, in the sentence */ | ||
lookBehind(match: String | Document): Document | ||
/** search through following terms, in the sentence */ | ||
lookAhead(match: String | Document): Document | ||
/** return the terms before each match */ | ||
before(match: String | Document): Document | ||
/** return the terms after each match */ | ||
after(match: String | Document): Document | ||
/** quick find for an array of string matches */ | ||
lookup(matches: String[]): Document | ||
/** grab a subset of the results */ | ||
slice(...args: any[]): any; | ||
// Case | ||
/** turn every letter of every term to lower-cse */ | ||
toLowerCase(): Document | ||
/** turn every letter of every term to upper case */ | ||
toUpperCase(): Document | ||
/** upper-case the first letter of each term */ | ||
toTitleCase(): Document | ||
/** remove whitespace and title-case each term */ | ||
toCamelCase(): Document | ||
/** use only the nth result */ | ||
get(...args: any[]): any; | ||
/** use only the first result */ | ||
first(...args: any[]): any; | ||
/** use only the last result */ | ||
last(...args: any[]): any; | ||
// Whitespace | ||
/** add this punctuation or whitespace before each match */ | ||
pre(str: String): Document | ||
/** add this punctuation or whitespace after each match */ | ||
post(str: String): Document | ||
/** remove start and end whitespace */ | ||
trim(): Document | ||
/** connect words with hyphen, and remove whitespace */ | ||
hyphenate(): Document | ||
/** remove hyphens between words, and set whitespace */ | ||
dehyphenate(): Document | ||
concat(...args: any[]): any; | ||
// Tag | ||
/** Give all terms the given tag */ | ||
tag(tag: String, reason?: String): Document | ||
/** Only apply tag to terms if it is consistent with current tags */ | ||
tagSafe(tag: String, reason?: String): Document | ||
/** Remove this term from the given terms */ | ||
unTag(tag: String, reason?: String): Document | ||
/** return only the terms that can be this tag */ | ||
canBe(tag: String): Document | ||
/** make it into one sentence/termlist */ | ||
flatten(...args: any[]): any; | ||
// Loops | ||
/** run each phrase through a function, and create a new document */ | ||
map(fn: Function): Document | [] | ||
/** run a function on each phrase, as an individual document */ | ||
forEach(fn: Function): Document | ||
/** return only the phrases that return true */ | ||
filter(fn: Function): Document | ||
/** return a document with only the first phrase that matches */ | ||
find(fn: Function): Document | undefined | ||
/** return true or false if there is one matching phrase */ | ||
some(fn: Function): Document | ||
/** sample a subset of the results */ | ||
random(n?: Number): Document | ||
/** see if these terms can become this tag */ | ||
canBe(...args: any[]): any; | ||
// Insert | ||
/** substitute-in new content */ | ||
replaceWith(text: String, keepTags?: Boolean): Document | ||
/** search and replace match with new content */ | ||
replace(match: String, text?: String, keepTags?: Boolean): Document | ||
/** fully remove these terms from the document */ | ||
delete(match: String): Document | ||
/** add these new terms to the end (insertAfter) */ | ||
append(text: String): Document | ||
/** add these new terms to the front (insertBefore) */ | ||
prepend(text: String): Document | ||
/** add these new things to the end */ | ||
concat(text: String): Document | ||
/** sample part of the array */ | ||
random(...args: any[]): any; | ||
setPunctuation(...args: any[]): any; | ||
getPunctuation(...args: any[]): any; | ||
// jquery-like api aliases | ||
offset(...args: any[]): any; | ||
text(...args: any[]): any; | ||
eq(...args: any[]): any; | ||
join(...args: any[]): any; | ||
// transform | ||
/**re-arrange the order of the matches (in place) */ | ||
sort(method?: String | Function): Document | ||
/**reverse the order of the matches, but not the words */ | ||
reverse(): Document | ||
/** clean-up the document, in various ways */ | ||
normalize(options?: String | Object): String | ||
/** remove any duplicate matches */ | ||
unique(): Document | ||
/** return a Document with three parts for every match ('splitOn') */ | ||
split(match?: String): Document | ||
/** separate everything after the match as a new phrase */ | ||
splitBefore(match?: String): Document | ||
/** separate everything before the word, as a new phrase */ | ||
splitAfter(match?: String): Document | ||
/** split a document into labeled sections */ | ||
segment(regs: Object, options?: Object): Document | ||
/** make all phrases into one phrase */ | ||
join(str?: String): Document | ||
// loops | ||
toTitleCase(...args: any[]): any; | ||
toUpperCase(...args: any[]): any; | ||
toLowerCase(...args: any[]): any; | ||
toCamelCase(...args: any[]): any; | ||
// Output | ||
/** return the document as text */ | ||
text(options?: String | Object): String | ||
/** pull out desired metadata from the document */ | ||
json(options?: String | Object): any | ||
/** some named output formats */ | ||
out(format?: string): String | ||
/** pretty-print the current document and its tags */ | ||
debug(): Document | ||
/** store a parsed document for later use */ | ||
export(): any | ||
hyphenate(...args: any[]): any; | ||
dehyphenate(...args: any[]): any; | ||
trim(...args: any[]): any; | ||
// Selections | ||
/** split-up results by each individual term */ | ||
terms(n?: Number): Document | ||
/** split-up results into multi-term phrases */ | ||
clauses(n?: Number): Document | ||
/** return all terms connected with a hyphen or dash like `'wash-out'`*/ | ||
hyphenated(n?: Number): Document | ||
/** return things like `'(939) 555-0113'` */ | ||
phoneNumbers(n?: Number): Document | ||
/** return things like `'#nlp'` */ | ||
hashTags(n?: Number): Document | ||
/** return things like `'hi@compromise.cool'` */ | ||
emails(n?: Number): Document | ||
/** return things like `:)` */ | ||
emoticons(n?: Number): Document | ||
/** return athings like `💋` */ | ||
emoji(n?: Number): Document | ||
/** return things like `'@nlp_compromise'`*/ | ||
atMentions(n?: Number): Document | ||
/** return things like `'compromise.cool'` */ | ||
urls(n?: Number): Document | ||
/** return things like `'quickly'` */ | ||
adverbs(n?: Number): Document | ||
/** return things like `'he'` */ | ||
pronouns(n?: Number): Document | ||
/** return things like `'but'`*/ | ||
conjunctions(n?: Number): Document | ||
/** return things like `'of'`*/ | ||
prepositions(n?: Number): Document | ||
/** return things like `'Mrs.'`*/ | ||
abbreviations(n?: Number): Document | ||
insertBefore(...args: any[]): any; | ||
insertAfter(...args: any[]): any; | ||
insertAt(...args: any[]): any; | ||
replace(...args: any[]): any; | ||
replaceWith(...args: any[]): any; | ||
delete(...args: any[]): any; | ||
lump(...args: any[]): any; | ||
tagger(...args: any[]): any; | ||
tag(...args: any[]): any; | ||
unTag(...args: any[]): any; | ||
// match | ||
/** do a regex-like search through terms and return a subset */ | ||
match(...args: any[]): any; | ||
not(...args: any[]): any; | ||
if(...args: any[]): any; | ||
ifNo(...args: any[]): any; | ||
has(...args: any[]): any; | ||
/** find a match and return everything in front of it */ | ||
before(...args: any[]): any; | ||
/** find a match and return everything after it */ | ||
after(...args: any[]): any; | ||
// alias 'and' | ||
and(...args: any[]): any; | ||
notIf(...args: any[]): any; | ||
only(...args: any[]): any; | ||
onlyIf(...args: any[]): any; | ||
// out | ||
out(...args: any[]): any; | ||
debug(...args: any[]): any; | ||
// sort | ||
/** reorder result.list alphabetically */ | ||
sort(...args: any[]): any; | ||
/** reverse the order of result.list */ | ||
reverse(...args: any[]): any; | ||
unique(...args: any[]): any; | ||
// split | ||
/** turn result into two seperate results */ | ||
splitAfter(...args: any[]): any; | ||
/** turn result into two seperate results */ | ||
splitBefore(...args: any[]): any; | ||
/** turn result into two seperate results */ | ||
splitOn(...args: any[]): any; | ||
// normalize | ||
normalize(...args: any[]): any; | ||
// subsets | ||
clauses(...args: any[]): any; | ||
hashTags(...args: any[]): any; | ||
organizations(...args: any[]): any; | ||
phoneNumbers(...args: any[]): any; | ||
places(...args: any[]): any; | ||
quotations(...args: any[]): any; | ||
topics(...args: any[]): any; | ||
urls(...args: any[]): any; | ||
questions(...args: any[]): any; | ||
statements(...args: any[]): any; | ||
parentheses(...args: any[]): any; | ||
// Subsets | ||
/** return any multi-word terms, like "didn't" */ | ||
contractions(n?: Number): Document | ||
/** return anything inside (parentheses) */ | ||
parentheses(n?: Number): Document | ||
/** return things like "Spencer's" */ | ||
possessives(n?: Number): Document | ||
/** return any terms inside 'quotation marks' */ | ||
quotations(n?: Number): Document | ||
/** return things like `'FBI'` */ | ||
acronyms(n?: Number): Document | ||
/** return things like `'eats, shoots, and leaves'` */ | ||
lists(n?: Number): Document | ||
/** return any subsequent terms tagged as a Noun */ | ||
nouns(n?: Number): Document | ||
/** return any subsequent terms tagged as a Verb */ | ||
verbs(n?: Number): Document | ||
} | ||
} | ||
export = compromise; | ||
export default nlp |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
No v1
QualityPackage is not semver >=1. This means it is not stable and does not support ^ ranges.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
Minified code
QualityThis package contains minified code. This may be harmless in some cases where minified code is included in packaged libraries, however packages on npm should not minify code.
Found 1 instance in 1 package
1494661
13
2
745
0
9
20638
1