Socket
Socket
Sign inDemoInstall

compromise

Package Overview
Dependencies
Maintainers
2
Versions
169
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

compromise - npm Package Compare versions

Comparing version 11.14.3 to 12.0.0-rc1

builds/compromise.js.map

4

changelog.md

@@ -9,2 +9,6 @@ compromise uses semver, and pushes to npm frequently

### v12
- drop support for `regex` and `patterns` in plugins
- camelCase() now removes punctuation between terms
### v11

@@ -11,0 +15,0 @@ ##### 11.13.0

67

package.json

@@ -5,37 +5,40 @@ {

"description": "natural language processing in the browser",
"version": "11.14.3",
"version": "12.0.0-rc1",
"main": "./builds/compromise.js",
"unpkg": "./builds/compromise.min.js",
"types": "types",
"module": "./builds/compromise.mjs",
"types": "types/index.d.ts",
"repository": {
"type": "git",
"url": "git://github.com/nlp-compromise/compromise.git"
"url": "git://github.com/spencermountain/compromise.git"
},
"scripts": {
"test": "tape \"./test/unit/**/*.test.js\" | tap-dancer",
"test:spec": "tape \"./test/unit/**/*.test.js\" | tap-spec",
"testb": "TESTENV=prod tape \"./test/unit/**/*.test.js\" | tap-dancer",
"buildTest": "TESTENV=prod node ./scripts/test.js",
"test:types": "dtslint types",
"browserTest": "node ./scripts/browserTest.js",
"benchmark": "node ./scripts/benchmark.js",
"build": "node ./scripts/build/index.js",
"build": "npm run version && rollup -c && npm run filesize",
"build:all": "node ./scripts/buildAll.js && npm run build",
"pack": "node ./scripts/pack.js",
"postpublish": "node ./scripts/postpublish",
"version": "node ./scripts/version.js",
"test": "node ./scripts/testAll.js",
"testb": "TESTENV=prod node ./scripts/testAll.js",
"testOne": "tape \"./tests/**/*.test.js\" | tap-dancer",
"test:spec": "tape \"./tests/**/*.test.js\" | tap-spec",
"filesize": "node ./scripts/filesize.js",
"watch": "amble ./scratch.js",
"filesize": "node ./scripts/lib/filesize.js",
"coverage": "node ./scripts/postpublish/coverage.js",
"lint": "node ./scripts/prepublish/linter.js"
"stress": "node ./scripts/stress-test/stress.js",
"plugins": "node ./scripts/plugin-check.js",
"speed": "node ./scripts/stress-test/speed.js",
"demo": "python -m SimpleHTTPServer 8888",
"coverage": "nyc --reporter=html tape \"./tests/**/*.test.js\" | tap-dancer --color always",
"test:types": "ts-node ./types/types.test.ts | tap-dancer",
"lint": "eslint ./src/"
},
"files": [
"builds/",
"docs/",
"types/index.d.ts"
],
"prettier": {
"trailingComma": "none",
"trailingComma": "es5",
"tabWidth": 2,
"semi": true,
"semi": false,
"singleQuote": true,
"printWidth": 100
"printWidth": 120
},

@@ -46,20 +49,20 @@ "dependencies": {

"devDependencies": {
"@babel/core": "7.5.5",
"@babel/preset-env": "7.5.5",
"@babel/core": "7.6.4",
"@babel/preset-env": "7.6.3",
"amble": "0.0.7",
"babelify": "10.0.0",
"babili": "0.1.4",
"browserify": "16.5.0",
"chalk": "2.4.2",
"codecov": "3.5.0",
"compromise-plugin": "0.0.9",
"derequire": "2.0.6",
"dtslint": "0.9.3",
"nyc": "14.1.1",
"efrt": "2.2.1",
"rollup": "1.26.3",
"rollup-plugin-babel": "4.3.3",
"rollup-plugin-commonjs": "10.1.0",
"rollup-plugin-json": "4.0.0",
"rollup-plugin-node-resolve": "5.2.0",
"rollup-plugin-terser": "5.1.2",
"shelljs": "0.8.3",
"tap-dancer": "0.2.0",
"tape": "4.11.0",
"terser": "4.2.1"
"tape": "4.11.0"
},
"eslintIgnore": [
"builds/*.js"
],
"license": "MIT"
}
<div align="center">
<img src="https://cloud.githubusercontent.com/assets/399657/21955696/46e882d4-da3e-11e6-94a6-720c34e27df7.jpg" />
<div><b>compromise</b></div>
<img src="https://user-images.githubusercontent.com/399657/68222691-6597f180-ffb9-11e9-8a32-a7f38aa8bded.png"/>
<div>modest natural language processing</div>
<div><code>npm install compromise</code></div>
<div align="center">
<sub>
by
<a href="https://github.com/spencermountain">Spencer Kelly</a> and
<a href="https://github.com/spencermountain/compromise/graphs/contributors">
many contributors
</a>
</sub>
</div>
<img height="25px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>
</div>
<div align="center">
<a href="https://npmjs.org/package/compromise">
<div>
<a href="https://npmjs.org/package/compromise">
<img src="https://img.shields.io/npm/v/compromise.svg?style=flat-square" />

@@ -14,110 +29,169 @@ </a>

</a>
<div>modest natural-language processing in javascript</div>
<sub>
by
<a href="https://github.com/spencermountain">Spencer Kelly</a> and
<a href="https://github.com/spencermountain/compromise/graphs/contributors">
many contributors
</a>
</sub>
</div>
</div>
<br/>
<!-- small enough for the browser... -->
<img src="https://user-images.githubusercontent.com/399657/35828705-828fd2ca-0a8e-11e8-9f12-88e840b8b399.png" />
<!-- spacer -->
<img height="15px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>
<!-- two gifs -->
<table align="center">
<tr>
<td>
<a href="http://compromise.cool">
<img width="390" src="https://user-images.githubusercontent.com/399657/35871664-cdab2bca-0b32-11e8-8827-81de658216fa.gif" />
</a>
</td>
<td>
<a href="http://compromise.cool">
<img width="390" src="https://user-images.githubusercontent.com/399657/35871669-d05e8d26-0b32-11e8-99c6-0f8887ae40ea.gif" />
</a>
</td>
</tr>
</table>
<div align="left">
- <img height="30px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>compromise<a href="https://observablehq.com/@spencermountain/compromise-justification">tries its best</a>.
</div>
save yourself from **regex-whackamole**🤞:
<div align="left">
<img height="30px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>
it is
<a href="https://docs.compromise.cool/compromise-filesize">small,
<a href="https://docs.compromise.cool/compromise-performance">quick</a>,
and <a href="https://docs.compromise.cool/compromise-accuracy">usually good-enough</a>.
</div>
<!-- spacer -->
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>
### .match():
compromise makes it simple to interpret and match text:
```js
nlp(entireNovel).sentences().if('the #Adjective of times').out()
let doc = nlp(entireNovel)
doc.if('the #Adjective of times').text()
// "it was the blurst of times??"
```
move things around:
```js
nlp('she sells seashells by the seashore.').sentences().toFutureTense().out()
// 'she will sell seashells...'
if (doc.has('^simon says #Verb+')) {
return doc.match('#Verb .*').text() //'fire the lazer ..'
}
```
respond to text input:
<div align="right">
<a href="https://docs.compromise.cool/compromise-match">match docs</a>
</div>
<div align="center">
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221837-0d142480-ffb8-11e9-9d30-90669f1b897c.png"/>
</div>
### .verbs():
conjugate and negate verbs in any tense:
```js
if( doc.has('^simon says (shoot|fire) #Determiner lazer') ){
fireLazer()
} else {
dontFire()
}
let doc = nlp('she sells seashells by the seashore.')
doc.verbs().toPastTense()
doc.text()
// 'she sold seashells by the seashore.'
```
<div align="right">
<a href="https://docs.compromise.cool/verbs">verb docs</a>
</div>
<div align="center">
compromise is not <a href="https://github.com/spencermountain/compromise/wiki/Justification">the cleverest</a>.
<br/>
but it is
<a href="https://beta.observablehq.com/@spencermountain/compromise-filesize">small,
<a href="https://beta.observablehq.com/@spencermountain/compromise-performance">quick</a>,
and <a href="https://beta.observablehq.com/@spencermountain/compromise-accuracy">good-enough</a> for a bunch of stuff.
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221824-09809d80-ffb8-11e9-9ef0-6ed3574b0ce8.png"/>
</div>
----
### .nouns():
<!-- three-table section -->
transform nouns to plural and possessive forms:
```js
let doc = nlp('the purple dinosaur')
doc.nouns().toPlural()
doc.text()
// 'the purple dinosaurs'
```
<div align="right">
<a href="https://docs.compromise.cool/nouns">noun docs</a>
</div>
<div align="center">
<table align="center">
<tr align="center">
<td align="center">
<b>
&lt;script src&gt;
</b>
<div>
&nbsp; &nbsp; &nbsp; &nbsp; <a href="https://github.com/spencermountain/compromise/wiki/QuickStart">one javascript file</a> &nbsp; &nbsp; &nbsp; &nbsp;
</div>
</td>
<td align="center">
<b>🙏</b>
<div>
&nbsp; &nbsp; <kbd>npm install compromise</kbd> &nbsp; &nbsp;
</div>
</td>
<td align="center">
<div>
<b>
<a href="https://beta.observablehq.com/@spencermountain/compromise-accuracy">
86%
</a>
</b>
<div>
&nbsp; &nbsp; on the Penn treebank &nbsp; &nbsp;
</div>
</td>
<td align="center">
<b>IE9+</b>
<div>
&nbsp; &nbsp; &nbsp; caniuse, youbetcha &nbsp; &nbsp; &nbsp;
</div>
</td>
</tr>
</table>
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221731-e8b84800-ffb7-11e9-8453-6395e0e903fa.png"/>
</div>
<!-- Install section -->
#### ⚡️ on the Client-side
### .numbers():
interpret plaintext numbers
```js
nlp.extend(require('compromise-numbers'))
let doc = nlp('ninety five thousand and fifty two')
doc.numbers().add(2)
doc.text()
// 'ninety five thousand and fifty four'
```
<div align="right">
<a href="https://docs.compromise.cool/compromise-values">number docs</a>
</div>
<div align="center">
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221814-05ed1680-ffb8-11e9-8b6b-c7528d163871.png"/>
</div>
### .topics():
grab subjects in a text:
```js
nlp.extend(require('compromise-entities'))
let doc = nlp(buddyHolly)
doc.people().if('mary').json()
// [{text:'Mary Tyler Moore'}]
let doc = nlp(freshPrince)
doc.places().first().text()
// 'West Phillidelphia'
doc = nlp('the opera about richard nixon visiting china')
doc.topics().json()
// [
// { text: 'richard nixon' },
// { text: 'china' }
// ]
```
<div align="right">
<a href="https://docs.compromise.cool/topics-named-entity-recognition">topics docs</a>
</div>
<div align="center">
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221632-b9094000-ffb7-11e9-99e0-b48edd6cdf8a.png"/>
</div>
### .contractions():
work with contracted and implicit words:
```js
let doc = nlp("we're not gonna take it, no we ain't gonna take it.")
// match an implicit term
doc.has('going') // true
// transform
doc.contractions().expand()
dox.text()
// 'we are not going to take it, no we are not going to take it.'
```
<div align="right">
<a href="https://docs.compromise.cool/compromise-contractions">contraction docs</a>
</div>
<div align="center">
<img src="https://user-images.githubusercontent.com/399657/68221731-e8b84800-ffb7-11e9-8453-6395e0e903fa.png"/>
<!-- spacer -->
<img height="30" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>
</div>
Use it on the client-side:
```html
<script src="https://unpkg.com/compromise@latest/builds/compromise.min.js"></script>
<script src="https://unpkg.com/compromise"></script>
<script>
var doc = nlp('dinosaur')
var str = doc.nouns().toPlural().out('text')
var str = doc.nouns().toPlural().text()
console.log(str)

@@ -128,325 +202,472 @@ // 'dinosaurs'

#### 🌋 Server-side!
```javascript
var nlp = require('compromise')
or as an es-module:
```typescript
import nlp from 'compromise'
var doc = nlp('London is calling')
doc.sentences().toNegative()
doc.verbs().toNegative()
// 'London is not calling'
```
<!-- spacer -->
<img height="30" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>
compromise is **170kb** (minified):
<div align="center">
Get the hang of things:
<!-- filesize -->
<a href="https://bundlephobia.com/result?p=compromise">
<img width="600" src="https://user-images.githubusercontent.com/399657/68234819-14dfc300-ffd0-11e9-8b30-cb8545707b29.png"/>
</a>
</div>
<table align="center">
<tr>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/tutorial-1">
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Tutorial #1 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
<div align="center">
<sub>Input → output</sub>
</div>
</td>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/compromise-tutorial-2">
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Tutorial #2 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
<div align="center">
<sub>Match & transform</sub>
</div>
</td>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/compromise-making-a-bot">
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Tutorial #3 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
<div align="center">
<sub>Making a bot</sub>
</div>
</td>
</tr>
</table>
it's pretty fast. It can run on keypress:
<div align="center">
<a href="https://observablehq.com/@spencermountain/compromise-performance">
<img width="600" src="https://user-images.githubusercontent.com/399657/68234798-0abdc480-ffd0-11e9-9ac5-8875d185a631.png"/>
</a>
</div>
it works mainly by <a href="https://observablehq.com/@spencermountain/verbs">conjugating many forms</a> of a basic word list.
The final lexicon is ~14,000 words:
<div align="center">
Detailed docs:
<img width="600" src="https://user-images.githubusercontent.com/399657/68234805-0d201e80-ffd0-11e9-8dc6-f7a600352555.png"/>
</div>
<table align="center">
<tr>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/compromise-api">
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; API &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
</td>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/compromise-tags">
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Full Tagset &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
</td>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/compromise-plugins">
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Plugins &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
</td>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/compromise-output">
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Outputs &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
</td>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/compromise-match-syntax">
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Match Syntax &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
</td>
</tr>
</table>
## Examples:
you can read more about how it works, [here](https://observablehq.com/@spencermountain/compromise-internals).
<table>
<tr>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/nlp-compromise">
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Part-of-Speech tagging &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
<div align="center">
<sub>nouns! verbs! adjectives!</sub>
</div>
</td>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/topics-named-entity-recognition">
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Named-entities &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
<div align="center">
<sub>people, places, organizations</sub>
</div>
</td>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/compromise-values">
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Number parsing &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
<div align="center">
<sub>seven hundred and fifty == 750</sub>
</div>
</td>
</tr>
<tr>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/tutorial-1">
&nbsp; &nbsp; &nbsp; &nbsp; Grammar-match &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
<div align="center">
<sub>like a regex for a sentence</sub>
</div>
</td>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/verbs">
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Verb conjugation &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
<div align="center">
<sub>all your base are belong</sub>
</div>
</td>
<td>
<div align="center">
<a href="https://beta.observablehq.com/@spencermountain/compromise-normalization">
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Normalization &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;
</a>
</div>
<div align="center">
<sub>case, whitespace, contractions..</sub>
</div>
</td>
</tr>
</table>
<!-- spacer -->
<!-- <img height="25px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/> -->
<div align="center">
<img src="https://user-images.githubusercontent.com/399657/68221814-05ed1680-ffb8-11e9-8b6b-c7528d163871.png"/>
</div>
* <a href="https://beta.observablehq.com/@spencermountain/nouns"><b>Plural/singular:</b></a> - grab the noun-phrases, make em plural:
### .extend():
set a custom interpretation of your own words:
```js
doc = nlp('a bottle of beer on the wall.')
doc.nouns(0).toPlural()
doc.out('text')
//'The bottles of beer on the wall.'
let myWords = {
kermit: 'FirstName',
fozzie: 'FirstName',
}
let doc = nlp(muppetText, myWords)
```
* <a href="https://beta.observablehq.com/@spencermountain/compromise-values"><b>Number parsing:</b></a> - parse written-out numbers, and change their form:
or make more changes with a [compromise-plugin](https://observablehq.com/@spencermountain/compromise-plugins).
```js
doc = nlp('ninety five thousand and fifty two')
doc.values().toNumber().out()
// '95052'
const nlp = require('compromise')
doc = nlp('the 23rd of December')
doc.values().add(2).toText()
doc.out('text')
// 'the twenty fifth of December'
```
nlp.extend((Doc, world) => {
// add new tags
world.addTags({
Character: {
isA: 'Person',
notA: 'Adjective',
},
})
* <a href="https://beta.observablehq.com/@spencermountain/compromise-normalization"><b>Normalization:</b></a> - handle looseness & variety of random text:
```js
doc = nlp("the guest-singer's björk at seven thirty.").normalize().out('text')
// 'The guest singer is Bjork at 7:30.'
```
// add or change words in the lexicon
world.addWords({
kermit: 'Character',
gonzo: 'Character',
})
* <a href="https://beta.observablehq.com/@spencermountain/verbs"><b>Tense:</b></a> - switch to/from conjugations of any verb
```js
let doc = nlp('she sells seashells by the seashore.')
doc.sentences().toFutureTense().out('text')
//'she will sell seashells...'
// add methods to run after the tagger
world.postProcess(doc => {
doc.match('light the lights').tag('#Verb . #Plural')
})
doc.verbs().conjugate()
// [{ PastTense: 'sold',
// Infinitive: 'sell',
// Gerund: 'selling', ...
// }]
// add a whole new method
Doc.prototype.kermitVoice = function() {
this.sentences().prepend('well,')
this.match('i [(am|was)]').prepend('um,')
return this
}
})
```
* <a href="https://github.com/spencermountain/compromise/wiki/Contractions"><b> Contractions:</b></a> - grab, expand and contract:
```js
doc = nlp("we're not gonna take it, no we ain't gonna take it.")
doc.has('going') // true
doc.match('are not').length // == 2
doc.contractions().expand().out()
//'we are not going to take it, no we are not going to take it'
```
<div align="right">
<a href="https://docs.compromise.cool/compromise-plugins">.extend() docs</a>
</div>
<div align="center">
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221848-11404200-ffb8-11e9-90cd-3adee8d8564f.png"/>
</div>
* <a href="https://beta.observablehq.com/@spencermountain/topics-named-entity-recognition"><b> Named-entities:</b></a> - get the people, places, organizations:
```js
doc = nlp('the opera about richard nixon visiting china')
doc.topics().data()
// [
// { text: 'richard nixon' },
// { text: 'china' }
// ]
```
### API:
* <a href="https://github.com/spencermountain/compromise/wiki/Lexicon"><b>Custom lexicon:</b></a> - make it do what you'd like:
```js
var lexicon={
'boston': 'MusicalGroup'
}
doc = nlp('i heard Boston\'s set in Chicago', lexicon)
##### Constructor
//alternatively, fix it 'in-post':
doc.match('heard #Possessive set').terms(1).tag('MusicalGroup')
```
_(these methods are on the `nlp` object)_
* <a href="https://beta.observablehq.com/@spencermountain/compromise-output"><b> Handy outputs:</b></a> - get sensible data:
```js
doc = nlp('We like Roy! We like Roy!').sentences().out('array')
// ['We like Roy!', 'We like Roy!']
- **[.tokenize()](https://observablehq.com/@spencermountain/compromise-tokenization)** - parse text without running POS-tagging
- **[.extend()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - mix in a compromise-plugin
- **[.clone()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - make a deep-copy of the library state
- **[.load()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - re-generate a Doc object from .export() results
- **[.verbose()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - log our decision-making for debugging
- **[.version()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - current semver version of the library
doc = nlp('Tony Hawk').out('html')
/*
<span>
<span class="nl-Person nl-FirstName">Tony</span>
<span>&nbsp;</span>
<span class="nl-Person nl-LastName">Hawk</span>
</span>
*/
```
##### Utils
<!-- plugins section -->
* <a href="https://beta.observablehq.com/@spencermountain/compromise-plugins"><b> Plugins:</b></a> - allow adding vocabulary, fixing errors, and setting context quickly:
```js
var plugin = {
tags:{
Character:{
isA: 'Noun'
}
},
words:{
itchy: 'Character',
scratchy: 'Character'
}
}
nlp.plugin(plugin)
nlp(`Couldn't Itchy share his pie with Scratchy?`).debug()
/*
couldn't - #Modal, #Verb
itchy - #Character, #Noun
share - #Infinitive, #Verb
...
*/
```
- **[.all()](https://observablehq.com/@spencermountain/compromise-utils)** - return the whole original document ('zoom out')
- **[.found](https://observablehq.com/@spencermountain/compromise-utils)** _[getter]_ - is this document empty?
- **[.parent()](https://observablehq.com/@spencermountain/compromise-utils)** - return the previous result
- **[.parents()](https://observablehq.com/@spencermountain/compromise-utils)** - return all of the previous results
- **[.tagger()](https://observablehq.com/@spencermountain/compromise-tagger)** - (re-)run the part-of-speech tagger on this document
- **[.wordCount()](https://observablehq.com/@spencermountain/compromise-utils)** - count the # of terms in the document
- **[.length](https://observablehq.com/@spencermountain/compromise-utils)** _[getter]_ - count the # of characters in the document (string length)
- **[.clone()](https://observablehq.com/@spencermountain/compromise-utils)** - deep-copy the document, so that no references remain
- **[.cache({})](https://observablehq.com/@spencermountain/compromise-cache)** - freeze the current state of the document, for speed-purposes
- **[.uncache()](https://observablehq.com/@spencermountain/compromise-cache)** - un-freezes the current state of the document, so it may be transformed
<h3 align="center">
of course, there's <a href="https://beta.observablehq.com/@spencermountain/nlp-compromise">a lot more stuff</a>.
</h3>
<h4 align="center">
<b>Join in -</b>
we're fun, using <b>semver</b>, and moving fast:
</h4>
##### Accessors
<table>
<tr align="center">
<td>
<a href="https://twitter.com/nlp_compromise">
<img src="https://cloud.githubusercontent.com/assets/399657/21956672/a30cf206-da53-11e6-8c6c-0995cf2aef62.jpg"/>
<div>&nbsp; &nbsp; &nbsp; &nbsp; Twitter &nbsp; &nbsp; &nbsp; &nbsp;</div>
</a>
</td>
<td>
<a href="https://gitter.im/nlpcompromise/community">
<img src="https://user-images.githubusercontent.com/399657/59966970-9c310c00-94f1-11e9-97fb-e5767b5f3f0e.png"/>
<div>&nbsp; &nbsp; &nbsp; Gitter chat &nbsp; &nbsp; &nbsp; </div>
</a>
</td>
<td>
<a href="https://stackoverflow.com/questions/tagged/nlp-compromise">
<img src="https://user-images.githubusercontent.com/399657/59967058-17df8880-94f3-11e9-8d4f-1423bdfd8508.png"/>
<div>&nbsp; &nbsp; &nbsp; Stackoverflow &nbsp; &nbsp; &nbsp; </div>
</a>
</td>
<td>
<a href="https://github.com/nlp-compromise/nlp_compromise/wiki/Projects">
<img src="https://cloud.githubusercontent.com/assets/399657/26513481/a755ac38-4239-11e7-960a-1c26d85ddc1c.png"/>
<div>&nbsp; &nbsp; &nbsp; &nbsp; Projects &nbsp; &nbsp; &nbsp; &nbsp; </div>
</a>
</td>
<td>
<a href="https://github.com/spencermountain/compromise/wiki/Contributing">
<img src="https://cloud.githubusercontent.com/assets/399657/21956742/5985a89c-da55-11e6-87bc-4f0f1549d202.jpg"/>
<div>&nbsp; &nbsp; &nbsp; Pull-requests &nbsp; &nbsp; &nbsp; </div>
</a>
</td>
</tr>
</table>
- **[.first(n)](https://observablehq.com/@spencermountain/compromise-accessors)** - use only the first result(s)
- **[.last(n)](https://observablehq.com/@spencermountain/compromise-accessors)** - use only the last result(s)
- **[.slice(n,n)](https://observablehq.com/@spencermountain/compromise-accessors)** - grab a subset of the results
- **[.eq(n)](https://observablehq.com/@spencermountain/compromise-accessors)** - use only the nth result
- **[.firstTerm()](https://observablehq.com/@spencermountain/compromise-accessors)** - get the first word in each match
- **[.lastTerm()](https://observablehq.com/@spencermountain/compromise-accessors)** - get the end word in each match
- **[.termList()](https://observablehq.com/@spencermountain/compromise-accessors)** - return a flat list of all Term objects in match
<div align="left">
<a href="https://www.youtube.com/watch?v=WuPVS2tCg8s">
<img width="300" src="http://img.youtube.com/vi/WuPVS2tCg8s/mqdefault.jpg"/>
</a>
<a href="https://www.youtube.com/watch?v=c_hmwFwvO0U">
<img width="300" src="https://user-images.githubusercontent.com/399657/27890263-88e1fd10-61bf-11e7-93f2-745167f88d58.png"/>
</a>
##### Match
_(all match methods use the [match-syntax](https://docs.compromise.cool/compromise-match-syntax).)_
- **[.match('')](https://observablehq.com/@spencermountain/compromise-match)** - return a new Doc, with this one as a parent
- **[.not('')](https://observablehq.com/@spencermountain/compromise-match)** - return all results except for this
- **[.matchOne('')](https://observablehq.com/@spencermountain/compromise-match)** - return only the first match
- **[.if('')](https://observablehq.com/@spencermountain/compromise-match)** - return each current phrase, only if it contains this match ('only')
- **[.ifNo('')](https://observablehq.com/@spencermountain/compromise-match)** - Filter-out any current phrases that have this match ('notIf')
- **[.has('')](https://observablehq.com/@spencermountain/compromise-match)** - Return a boolean if this match exists
- **[.lookBehind('')](https://observablehq.com/@spencermountain/compromise-match)** - search through earlier terms, in the sentence
- **[.lookAhead('')](https://observablehq.com/@spencermountain/compromise-match)** - search through following terms, in the sentence
- **[.before('')](https://observablehq.com/@spencermountain/compromise-match)** - return all terms before a match, in each phrase
- **[.after('')](https://observablehq.com/@spencermountain/compromise-match)** - return all terms after a match, in each phrase
- **[.lookup([])](https://observablehq.com/@spencermountain/compromise-match)** - quick find for an array of string matches
##### Case
- **[.toLowerCase()](https://observablehq.com/@spencermountain/compromise-case)** - turn every letter of every term to lower-cse
- **[.toUpperCase()](https://observablehq.com/@spencermountain/compromise-case)** - turn every letter of every term to upper case
- **[.toTitleCase()](https://observablehq.com/@spencermountain/compromise-case)** - upper-case the first letter of each term
- **[.toCamelCase()](https://observablehq.com/@spencermountain/compromise-case)** - remove whitespace and title-case each term
##### Whitespace
- **[.pre('')](https://observablehq.com/@spencermountain/compromise-whitespace)** - add this punctuation or whitespace before each match
- **[.post('')](https://observablehq.com/@spencermountain/compromise-whitespace)** - add this punctuation or whitespace after each match
- **[.trim()](https://observablehq.com/@spencermountain/compromise-whitespace)** - remove start and end whitespace
- **[.hyphenate()](https://observablehq.com/@spencermountain/compromise-whitespace)** - connect words with hyphen, and remove whitespace
- **[.dehyphenate()](https://observablehq.com/@spencermountain/compromise-whitespace)** - remove hyphens between words, and set whitespace
##### Tag
- **[.tag('')](https://observablehq.com/@spencermountain/compromise-tagger)** - Give all terms the given tag
- **[.tagSafe('')](https://observablehq.com/@spencermountain/compromise-tagger)** - Only apply tag to terms if it is consistent with current tags
- **[.unTag('')](https://observablehq.com/@spencermountain/compromise-tagger)** - Remove this term from the given terms
- **[.canBe('')](https://observablehq.com/@spencermountain/compromise-tagger)** - return only the terms that can be this tag
##### Loops
- **[.map(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - run each phrase through a function, and create a new document
- **[.forEach(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - run a function on each phrase, as an individual document
- **[.filter(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - return only the phrases that return true
- **[.find(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - return a document with only the first phrase that matches
- **[.some(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - return true or false if there is one matching phrase
- **[.random(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - sample a subset of the results
##### Insert
- **[.replace(match, replace)](https://observablehq.com/@spencermountain/compromise-insert)** - search and replace match with new content
- **[.replaceWith(replace)](https://observablehq.com/@spencermountain/compromise-insert)** - substitute-in new text
- **[.delete()](https://observablehq.com/@spencermountain/compromise-insert)** - fully remove these terms from the document
- **[.append(str)](https://observablehq.com/@spencermountain/compromise-insert)** - add these new terms to the end (insertAfter)
- **[.prepend(str)](https://observablehq.com/@spencermountain/compromise-insert)** - add these new terms to the front (insertBefore)
- **[.concat()](https://observablehq.com/@spencermountain/compromise-insert)** - add these new things to the end
##### Transform
- **[.sort('method')](https://observablehq.com/@spencermountain/compromise-sorting)** - re-arrange the order of the matches (in place)
- **[.reverse()](https://observablehq.com/@spencermountain/compromise-sorting)** - reverse the order of the matches, but not the words
- **[.normalize({})](https://observablehq.com/@spencermountain/compromise-normalization)** - clean-up the text in various ways
- **[.unique()](https://observablehq.com/@spencermountain/compromise-sorting)** - remove any duplicate matches
- **[.split('')](https://observablehq.com/@spencermountain/compromise-split)** - return a Document with three parts for every match ('splitOn')
- **[.splitBefore('')](https://observablehq.com/@spencermountain/compromise-split)** - partition a phrase before each matching segment
- **[.splitAfter('')](https://observablehq.com/@spencermountain/compromise-split)** - partition a phrase after each matching segment
- **[.segment({})](https://observablehq.com/@spencermountain/compromise-split)** - split a document into labeled sections
- **[.join('')](https://observablehq.com/@spencermountain/compromise-split)** - make all phrases into one phrase
##### Output
- **[.text('method')](https://observablehq.com/@spencermountain/compromise-output)** - return the document as text
- **[.json({})](https://observablehq.com/@spencermountain/compromise-json)** - pull out desired metadata from the document
- **[.out('array|offset|terms')](https://observablehq.com/@spencermountain/compromise-output)** - some named output formats (deprecated)
- **[.debug()](https://observablehq.com/@spencermountain/compromise-output)** - pretty-print the current document and its tags
- **[.export()](https://observablehq.com/@spencermountain/compromise-export)** - store a parsed document for later use
##### Selections
- **[.terms()](https://observablehq.com/@spencermountain/compromise-selections)** - split-up results by each individual term
- **[.clauses()](https://observablehq.com/@spencermountain/compromise-selections)** - split-up sentences into multi-term phrases
- **[.hyphenated()](https://observablehq.com/@spencermountain/compromise-selections)** - all terms connected with a hyphen or dash like `'wash-out'`
- **[.phoneNumbers()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'(939) 555-0113'`
- **[.hashTags()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'#nlp'`
- **[.emails()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'hi@compromise.cool'`
- **[.emoticons()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `:)`
- **[.emojis()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `💋`
- **[.atMentions()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'@nlp_compromise'`
- **[.urls()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'compromise.cool'`
- **[.adverbs()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'quickly'`
- **[.pronouns()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'he'`
- **[.conjunctions()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'but'`
- **[.prepositions()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'of'`
- **[.abbreviations()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'Mrs.'`
##### Subsets
- **[.contractions()](https://observablehq.com/@spencermountain/compromise-selections)** - things like "didn't"
- **[.parentheses()](https://observablehq.com/@spencermountain/compromise-selections)** - return anything inside (parentheses)
- **[.possessives()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `"Spencer's"`
- **[.quotations()](https://observablehq.com/@spencermountain/compromise-selections)** - return any terms inside quotation marks
- **[.acronyms()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'FBI'`
- **[.lists()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'eats, shoots, and leaves'`
- **[.nouns()](https://observablehq.com/@spencermountain/nouns)** - return any subsequent terms tagged as a Noun
- **[.nouns().json()](https://observablehq.com/@spencermountain/nouns)** - overloaded output with noun metadata
- **[.nouns().toPlural()](https://observablehq.com/@spencermountain/nouns)** - `'football captain' → 'football captains'`
- **[.nouns().toSingular()](https://observablehq.com/@spencermountain/nouns)** - `'turnovers' → 'turnover'`
- **[.nouns().isPlural()](https://observablehq.com/@spencermountain/nouns)** - return only plural nouns
- **[.nouns().isSingular()](https://observablehq.com/@spencermountain/nouns)** - return only singular nouns
- **[.nouns().hasPlural()](https://observablehq.com/@spencermountain/nouns)** - return only nouns that _can be_ inflected as plural
- **[.nouns().toPossessive()](https://observablehq.com/@spencermountain/nouns)** - add a `'s` to the end, in a safe manner.
- **[.verbs()](https://observablehq.com/@spencermountain/verbs)** - return any subsequent terms tagged as a Verb
- **[.verbs().json()](https://observablehq.com/@spencermountain/verbs)** - overloaded output with verb metadata
- **[.verbs().conjugate()](https://observablehq.com/@spencermountain/verbs)** - return all forms of these verbs
- **[.verbs().toPastTense()](https://observablehq.com/@spencermountain/verbs)** - `'will go' → 'went'`
- **[.verbs().toPresentTense()](https://observablehq.com/@spencermountain/verbs)** - `'walked' → 'walks'`
- **[.verbs().toFutureTense()](https://observablehq.com/@spencermountain/verbs)** - `'walked' → 'will walk'`
- **[.verbs().toInfinitive()](https://observablehq.com/@spencermountain/verbs)** - `'walks' → 'walk'`
- **[.verbs().toGerund()](https://observablehq.com/@spencermountain/verbs)** - `'walks' → 'walking'`
- **[.verbs().toNegative()](https://observablehq.com/@spencermountain/verbs)** - `'went' → 'did not go'`
- **[.verbs().toPositive()](https://observablehq.com/@spencermountain/verbs)** - `"didn't study" → 'studied'`
- **[.verbs().isNegative()](https://observablehq.com/@spencermountain/verbs)** - return verbs with 'not'
- **[.verbs().isPositive()](https://observablehq.com/@spencermountain/verbs)** - only verbs without 'not'
- **[.verbs().isPlural()](https://observablehq.com/@spencermountain/verbs)** - return plural verbs like 'we walk'
- **[.verbs().isSingular()](https://observablehq.com/@spencermountain/verbs)** - return singular verbs like 'spencer walks'
- **[.verbs().adverbs()](https://observablehq.com/@spencermountain/verbs)** - return the adverbs describing this verb.
<div align="center">
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221824-09809d80-ffb8-11e9-9ef0-6ed3574b0ce8.png"/>
</div>
<p></p>
### Plugins:
These are some helpful extensions:
##### Adjectives
`npm install compromise-adjectives`
- **[.adjectives()](https://observablehq.com/@spencermountain/compromise-adjectives)** - like `quick`
- **[.adjectives().json()](https://observablehq.com/@spencermountain/compromise-adjectives)** - overloaded output with adjective metadata
- **[.adjectives().conjugate()](https://observablehq.com/@spencermountain/compromise-adjectives)** - return all conjugated forms of this adjective
- **[.adjectives().toSuperlative()](https://observablehq.com/@spencermountain/compromise-adjectives)** - convert `quick` to `quickest`
- **[.adjectives().toComparative()](https://observablehq.com/@spencermountain/compromise-adjectives)** - convert `quick` to `quickest`
- **[.adjectives().toAdverb()](https://observablehq.com/@spencermountain/compromise-adjectives)** - convert `quick` to `quickly`
- **[.adjectives().toVerb()](https://observablehq.com/@spencermountain/compromise-adjectives)** - convert `quick` to `quicken`
- **[.adjectives().toNoun()](https://observablehq.com/@spencermountain/compromise-adjectives)** - convert `quick` to `quickness`
##### Dates
`npm install compromise-dates`
- **[.dates()](https://observablehq.com/@spencermountain/compromise-dates)** - find dates like `June 8th` or `03/03/18`
- **[.dates().json()](https://observablehq.com/@spencermountain/compromise-dates)** - overloaded output with date metadata
- **[.dates().format('')](https://observablehq.com/@spencermountain/compromise-dates)** - convert the dates to specific formats
##### Topics
`npm install compromise-topics`
- **[.people()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - names like 'John F. Kennedy'
- **[.places()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - like 'Paris, France'
- **[.organizations()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - like 'Google, Inc'
- **[.topics()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - `people()` + `places()` + `organizations()`
##### Numbers
`npm install compromise-numbers`
- **[.numbers()](https://observablehq.com/@spencermountain/compromise-values)** - grab all written and numeric values
- **[.numbers().json()](https://observablehq.com/@spencermountain/compromise-values)** - overloaded output with number metadata
- **[.money()](https://observablehq.com/@spencermountain/compromise-values)** - things like `'$2.50'`
- **[.fractions()](https://observablehq.com/@spencermountain/compromise-values)** - things like `1/3rd`
- **[.numbers().toText()](https://observablehq.com/@spencermountain/compromise-values)** - convert number to `five` or `fifth`
- **[.numbers().toNumber()](https://observablehq.com/@spencermountain/compromise-values)** - convert number to `5` or `5th`
- **[.numbers().toOrdinal()](https://observablehq.com/@spencermountain/compromise-values)** - convert number to `fifth` or `5th`
- **[.numbers().toCardinal()](https://observablehq.com/@spencermountain/compromise-values)** - convert number to `five` or `5`
- **[.numbers().add(n)](https://observablehq.com/@spencermountain/compromise-values)** - increase number by n
- **[.numbers().subtract(n)](https://observablehq.com/@spencermountain/compromise-values)** - decrease number by n
- **[.numbers().increment()](https://observablehq.com/@spencermountain/compromise-values)** - increase number by 1
- **[.numbers().decrement()](https://observablehq.com/@spencermountain/compromise-values)** - decrease number by 1
- **[.numbers().isEqual(n)](https://observablehq.com/@spencermountain/compromise-values)** - return numbers with this value
- **[.numbers().greaterThan(min)](https://observablehq.com/@spencermountain/compromise-values)** - return numbers bigger than n
- **[.numbers().lessThan(max)](https://observablehq.com/@spencermountain/compromise-values)** - return numbers smaller than n
- **[.numbers().between(min, max)](https://observablehq.com/@spencermountain/compromise-values)** - return numbers between min and max
- **[.numbers().isOrdinal()](https://observablehq.com/@spencermountain/compromise-values)** - return only ordinal numbers
- **[.numbers().isCardinal()](https://observablehq.com/@spencermountain/compromise-values)** - return only cardinal numbers
- **[.numbers().toLocaleString()](https://observablehq.com/@spencermountain/compromise-values)** - add commas, or nicer formatting for numbers
##### Ngrams
`npm install compromise-ngrams`
- **[.ngrams({})](https://observablehq.com/@spencermountain/compromise-ngram)** - list all repeating sub-phrases, by word-count
- **[.unigrams()](https://observablehq.com/@spencermountain/compromise-ngram)** - n-grams with one word
- **[.bigrams()](https://observablehq.com/@spencermountain/compromise-ngram)** - n-grams with two words
- **[.trigrams()](https://observablehq.com/@spencermountain/compromise-ngram)** - n-grams with three words
- **[.startgrams()](https://observablehq.com/@spencermountain/compromise-ngram)** - n-grams including the first term of a phrase
- **[.endgrams()](https://observablehq.com/@spencermountain/compromise-ngram)** - n-grams including the last term of a phrase
- **[.edgegrams()](https://observablehq.com/@spencermountain/compromise-ngram)** - n-grams including the first or last term of a phrase
##### Output
`npm install compromise-output`
- **[.hash()](#)** - generate an md5 hash from the document+tags
- **[.html({})]()** - generate sanitized html from the document
##### Paragraphs
`npm install compromise-paragraphs`
this plugin creates a wrapper around the default sentence objects.
- **[.paragraphs()](#)** - return groups of sentences
- **[.paragraphs().json()](#)** - output metadata for each paragraph
- **[.paragraphs().sentences()](#)** - go back to a regular Doc object
- **[.paragraphs().terms()](#)** -
- **[.paragraphs().eq()](#)** -
- **[.paragraphs().first()](#)** -
- **[.paragraphs().last()](#)** -
- **[.paragraphs().match()](#)** -
- **[.paragraphs().not()](#)** -
- **[.paragraphs().if()](#)** -
- **[.paragraphs().ifNo()](#)** -
- **[.paragraphs().has()](#)** -
- **[.paragraphs().forEach()](#)** -
- **[.paragraphs().map()](#)** -
- **[.paragraphs().filter()](#)** -
##### Sentences
`npm install compromise-sentences`
- **[.sentences()](#)** - return a sentence class with additional methods
- **[.sentences().json()](#)** - overloaded output with sentence metadata
- **[.sentences().subjects()](#)** - return the main noun of each sentence
- **[.sentences().toPastTense()](#)** -
- **[.sentences().toPresentTense()](#)** -
- **[.sentences().toFutureTense()](#)** -
- **[.sentences().toContinuous()](#)** -
- **[.sentences().toNegative()](#)** -
- **[.sentences().toPositive()](#)** -
- **[.sentences().isPassive()](#)** -
- **[.sentences().isQuestion()](#)** - return questions with a `?`
- **[.sentences().isExclamation()](#)** - return sentences with a `!`
- **[.sentences().isStatement()](#)** - return sentences without `?` or `!`
- **[.sentences().prepend()](#)** - smarter prepend that repairs whitespace + titlecasing
- **[.sentences().append()](#)** - smarter append that repairs sentence punctuation
- **[.sentences().toExclamation()](#)** -
- **[.sentences().toQuestion()](#)** -
- **[.sentences().toStatement()](#)** -
-
##### Syllables
`npm install compromise-syllables`
- **[.syllables()](https://observablehq.com/@spencermountain/compromise-syllables)** - split each term by its typical pronounciation
<!-- spacer -->
<div >
<img height="25px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>
<hr/>
<img height="25px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>
</div>
<!-- spacer -->
<div >
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>
</div>
### Docs:
##### Tutorials:
- **[Tutorial #1](https://docs.compromise.cool/tutorial-1)** - Input → output
- **[Tutorial #2](https://docs.compromise.cool/compromise-tutorial-2)** - Match & transform
- **[Tutorial #3](https://docs.compromise.cool/compromise-making-a-bot)** - Making a chat-bot
<!-- * **[Tutorial #4]()** - Making a plugin -->
##### 3rd party:
- **[Geocoding Social Conversations with NLP and JavaScript](http://compromise.cool)** - by Microsoft
- **[Microservice Recipe](https://eventn.com/recipes/text-parsing-with-nlp-compromise)** - by Eventn
- **[Building Text-Based Games](https://killalldefects.com/2019/09/24/building-text-based-games-with-compromise-nlp/)** - by Matt Eland
- **[Fun with javascript in BigQuery](https://medium.com/@hoffa/new-in-bigquery-persistent-udfs-c9ea4100fd83#6e09)** - by Felipe Hoffa
##### Talks:
- **[Language as an Interface](https://www.youtube.com/watch?v=WuPVS2tCg8s)** - by Spencer Kelly
- **[Coding Chat Bots](https://www.youtube.com/watch?v=c_hmwFwvO0U)** - by KahWee Teng
<div align="center">
<img height="50px" src="https://user-images.githubusercontent.com/399657/68221824-09809d80-ffb8-11e9-9ef0-6ed3574b0ce8.png"/>
</div>
##### Some fun Applications:
- **[Chat dialogue framework](http://superscriptjs.com/)** - by Rob Ellis
- **[Automated Bechdel Test](https://github.com/guardian/bechdel-test)** - by The Guardian
- **[Story generation framework](https://perchance.org/welcome)** - by Jose Phrocca
- **[Tumbler blog of lists](https://leanstooneside.tumblr.com/)** - horse-ebooks-like lists - by Michael Paulukonis
- **[Video Editing from Transcription](https://newtheory.io/)** - by New Theory
- **[Browser extension Fact-checking](https://github.com/AlexanderKidd/FactoidL)** - by Alexander Kidd
- **[Siri shortcut](https://routinehub.co/shortcut/3260)** - by Michael Byrns
- **[Amazon skill](https://github.com/tajddin/voiceplay)** - by Tajddin Maghni
- **[Tasking Slack-bot](https://github.com/kevinsuh/toki)** - by Kevin Suh
<!-- spacer -->
<div align="center">
<img height="25px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>
<hr/>
</div>
#### Limitations:
- **slash-support:**
We currently split slashes up as different words, like we do for hyphens. so things like this don't work:
<code>nlp('the koala eats/shoots/leaves').has('koala leaves') //false</code>
- **inter-sentence match:**
By default, sentences are the top-level abstraction.
Inter-sentence, or multi-sentence matches aren't supported:
<code>nlp("that's it. Back to Winnipeg!").has('it back')//false</code>
- **nested match syntax:**
the <s>danger</s> beauty of regex is that you can recurse indefinitely.
Our match syntax is much weaker. Things like this are not <i>(yet)</i> possible:
<code>doc.match('(modern (major|minor))? general')</code>
complex matches must be achieved with successive **.match()** statements.
- **dependency parsing:**
Proper sentence transformation requires understanding the [syntax tree](https://en.wikipedia.org/wiki/Parse_tree) of a sentence, which we don't currently do.
We should! Help wanted with this.
##### FAQ
<ul align="left">

@@ -458,10 +679,10 @@ <p>

<ul>
yeah!
yeah it is!
<br/>
it wasn't built to compete with the stanford tagger, and may not fit every project.
it wasn't built to compete with NLTK, and may not fit every project.
<br/>
string stuff is synchronous too, and parallelizing is weird.
string processing is synchronous too, and parallelizing node processes is weird.
<br/>
See <a href="https://beta.observablehq.com/@spencermountain/compromise-performance">here</a> for information about speed & performance, and
<a href="https://github.com/spencermountain/compromise/wiki/Justification">here></a> for project motivations
See <a href="https://observablehq.com/@spencermountain/compromise-performance">here</a> for information about speed & performance, and
<a href="https://observablehq.com/@spencermountain/compromise-justification">here</a> for project motivations
</ul>

@@ -478,3 +699,3 @@ <p></p>

<br/>
Read <a href="https://github.com/spencermountain/compromise/wiki/QuickStart">quickStart</a> for all sorts of funny environments.
Read <a href="https://observablehq.com/@spencermountain/compromise-quickstart">quick start</a> for running compromise in workers, mobile apps, and all sorts of funny environments.
</ul>

@@ -486,9 +707,8 @@ <p></p>

<details>
<summary>🌎 Other Languages?</summary>
<summary>🌎 Compromise in other Languages?</summary>
<p></p>
<ul>
okay! <br/>
we've got work-in-progress forks for <a href="https://github.com/nlp-compromise/de-compromise">German</a> and <a href="https://github.com/nlp-compromise/fr-compromise">French</a>, in the same philosophy.
<br/>
Get involved!
and need some help.
</ul>

@@ -503,8 +723,7 @@ <p></p>

<ul>
compromise is one function so can't really be tree-shaken.
<br/> .. and the tagging methods are competitive, so it's not recommended to pull things out.
<br/>
It's best to load the library fully, given it's smaller than <a href="https://68.media.tumblr.com/tumblr_m674jlpyPT1ry8fquo1_250.gif">this gif</a>.
<br/>
A plug-in scheme is in the works.
compromise isn't easily tree-shaken.
<br/>
the tagging methods are competitive, and greedy, so it's not recommended to pull things out.
<br/>
It's recommended to run the library fully.
</ul>

@@ -516,23 +735,18 @@ <p></p>

<hr/>
<div align="center">
<img src="https://user-images.githubusercontent.com/399657/68221731-e8b84800-ffb7-11e9-8453-6395e0e903fa.png"/>
</div>
### Also:
* &nbsp; **[naturalNode](https://github.com/NaturalNode/natural)** - fancier statistical nlp in javascript
* &nbsp; **[superScript](http://superscriptjs.com/)** - clever conversation engine in js
* &nbsp; **[nodeBox Linguistics](https://www.nodebox.net/code/index.php/Linguistics)** - conjugation, inflection in javascript
* &nbsp; **[reText](https://github.com/wooorm/retext)** - very impressive [text utilities](https://github.com/wooorm/retext/blob/master/doc/plugins.md) in javascript
* &nbsp; **[jsPos](https://code.google.com/archive/p/jspos/)** - javascript build of the time-tested Brill-tagger
* &nbsp; **[spaCy](https://spacy.io/)** - speedy, multilingual tagger in C/python
#### See Also:
For the former promise-library, see [jnewman/compromise](https://github.com/jnewman/compromise)
(Thanks [Joshua](https://github.com/jnewman)!)
- &nbsp; **[naturalNode](https://github.com/NaturalNode/natural)** - fancier statistical nlp in javascript
- &nbsp; **[superScript](http://superscriptjs.com/)** - clever conversation engine in js
- &nbsp; **[nodeBox linguistics](https://www.nodebox.net/code/index.php/Linguistics)** - conjugation, inflection in javascript
- &nbsp; **[reText](https://github.com/wooorm/retext)** - very impressive [text utilities](https://github.com/wooorm/retext/blob/master/doc/plugins.md) in javascript
- &nbsp; **[jsPos](https://code.google.com/archive/p/jspos/)** - javascript build of the time-tested Brill-tagger
- &nbsp; **[spaCy](https://spacy.io/)** - speedy, multilingual tagger in C/python
<div align="right">
(and don't forget 🙇
<a href="http://www.nltk.org/">NLTK</a>,
<a href="https://gate.ac.uk">GATE</a>,
<a href="http://nlp.stanford.edu/software/lex-parser.shtml">Stanford</a>,
and
<a href="http://cogcomp.cs.illinois.edu/page/software/">Illinois</a> libs
)
</div>
<img height="25px" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>
<b>MIT</b>
<div align="right">by <a href="https://spencermounta.in/">spencermountain</a></div>

@@ -1,189 +0,227 @@

declare function compromise(text: string, lexicon?: { [word: string]: string }): compromise.Text;
declare namespace compromise {
const version: string;
function verbose(str: any): void;
function tokenize(str: string): any;
function plugin(obj: any): any;
function clone(): any;
function unpack(...args: any[]): void;
function addWords(...args: any[]): void;
function addTags(...args: any[]): void;
function addRegex(...args: any[]): void;
function addPatterns(...args: any[]): void;
function addPlurals(...args: any[]): void;
function addConjugations(conj: { [verb: string]: Conjugation }): void;
export as namespace nlp
interface Conjugation {
Gerund?: string;
PresentTense?: string;
PastTense?: string;
PerfectTense?: string;
PluPerfectTense?: string;
FuturePerfect?: string;
Actor?: string;
}
declare function nlp(text: string): nlp.Document
interface Text {
/** did it find anything? */
readonly found: boolean;
/** just a handy wrap */
readonly parent: Text;
/** how many Texts are there? */
readonly length: number;
/** nicer than constructor.call.name or whatever */
readonly isA: 'Text';
/** the whitespace before and after this match */
readonly whitespace: {
before(str: string): Text,
after(str: string): Text
};
// Constructor
declare module nlp {
/** parse text into a compromise object, without running POS-tagging */
export function tokenize(text: string): Document
/** mix in a compromise-plugin */
export function extend(plugin: any): Document
/** make a deep-copy of the library state */
export function clone(): Document
/** re-generate a Doc object from .json() results */
export function load(json: any): Document
/** log our decision-making for debugging */
export function verbose(bool: boolean): Document
/** current semver version of the library */
export const version: Document
acronyms(...args: any[]): any;
adjectives(...args: any[]): any;
adverbs(...args: any[]): any;
contractions(...args: any[]): any;
dates(...args: any[]): any;
nouns(...args: any[]): any;
people(...args: any[]): any;
sentences(...args: any[]): any;
terms(...args: any[]): any;
possessives(...args: any[]): any;
values(...args: any[]): any;
verbs(...args: any[]): any;
ngrams(...args: any[]): any;
startGrams(...args: any[]): any;
endGrams(...args: any[]): any;
words(...args: any[]): any;
class Document {
// Utils
/** return the whole original document ('zoom out') */
all(): Document
/** is this document empty? */
found: Boolean
/** return the previous result */
parent(): Document
/** return all of the previous results */
parents(): Document[]
/** (re)run the part-of-speech tagger on this document */
tagger(): Document
/** count the # of terms in each match */
wordCount(): Number
/** count the # of characters of each match */
length(): Number
/** deep-copy the document, so that no references remain */
clone(shallow?: Boolean): Document
/** freeze the current state of the document, for speed-purposes */
cache(options?: Object): Document
/** un-freezes the current state of the document, so it may be transformed */
uncache(options?: Object): Document
// misc
all(...args: any[]): any;
index(...args: any[]): any;
wordCount(...args: any[]): any;
data(...args: any[]): any;
/* javascript array loop-wrappers */
map(...args: any[]): any;
forEach(...args: any[]): any;
filter(...args: any[]): any;
reduce(...args: any[]): any;
find(...args: any[]): any;
/** copy data properly so later transformations will have no effect */
clone(...args: any[]): any;
// Accessors
/** use only the first result(s) */
first(n?: Number): Document
/** use only the last result(s) */
last(n?: Number): Document
/** grab a subset of the results */
slice(start: Number, end?: Number): Document
/** use only the nth result */
eq(n: Number): Document
/** get the first word in each match */
firstTerm(): Document
/** get the end word in each match */
lastTerm(): Document
/** return a flat list of all Term objects in match */
termList(): any
/** get the nth term of each result */
term(...args: any[]): any;
firstTerm(...args: any[]): any;
lastTerm(...args: any[]): any;
// Match
/** return a new Doc, with this one as a parent */
match(match: String | Document): Document
/** return all results except for this */
not(match: String | Document): Document
/** return only the first match */
matchOne(match: String | Document): Document
/** return each current phrase, only if it contains this match */
if(match: String | Document): Document
/** Filter-out any current phrases that have this match */
ifNo(match: String | Document): Document
/** Return a boolean if this match exists */
has(match: String | Document): Document
/** search through earlier terms, in the sentence */
lookBehind(match: String | Document): Document
/** search through following terms, in the sentence */
lookAhead(match: String | Document): Document
/** return the terms before each match */
before(match: String | Document): Document
/** return the terms after each match */
after(match: String | Document): Document
/** quick find for an array of string matches */
lookup(matches: String[]): Document
/** grab a subset of the results */
slice(...args: any[]): any;
// Case
/** turn every letter of every term to lower-cse */
toLowerCase(): Document
/** turn every letter of every term to upper case */
toUpperCase(): Document
/** upper-case the first letter of each term */
toTitleCase(): Document
/** remove whitespace and title-case each term */
toCamelCase(): Document
/** use only the nth result */
get(...args: any[]): any;
/** use only the first result */
first(...args: any[]): any;
/** use only the last result */
last(...args: any[]): any;
// Whitespace
/** add this punctuation or whitespace before each match */
pre(str: String): Document
/** add this punctuation or whitespace after each match */
post(str: String): Document
/** remove start and end whitespace */
trim(): Document
/** connect words with hyphen, and remove whitespace */
hyphenate(): Document
/** remove hyphens between words, and set whitespace */
dehyphenate(): Document
concat(...args: any[]): any;
// Tag
/** Give all terms the given tag */
tag(tag: String, reason?: String): Document
/** Only apply tag to terms if it is consistent with current tags */
tagSafe(tag: String, reason?: String): Document
/** Remove this term from the given terms */
unTag(tag: String, reason?: String): Document
/** return only the terms that can be this tag */
canBe(tag: String): Document
/** make it into one sentence/termlist */
flatten(...args: any[]): any;
// Loops
/** run each phrase through a function, and create a new document */
map(fn: Function): Document | []
/** run a function on each phrase, as an individual document */
forEach(fn: Function): Document
/** return only the phrases that return true */
filter(fn: Function): Document
/** return a document with only the first phrase that matches */
find(fn: Function): Document | undefined
/** return true or false if there is one matching phrase */
some(fn: Function): Document
/** sample a subset of the results */
random(n?: Number): Document
/** see if these terms can become this tag */
canBe(...args: any[]): any;
// Insert
/** substitute-in new content */
replaceWith(text: String, keepTags?: Boolean): Document
/** search and replace match with new content */
replace(match: String, text?: String, keepTags?: Boolean): Document
/** fully remove these terms from the document */
delete(match: String): Document
/** add these new terms to the end (insertAfter) */
append(text: String): Document
/** add these new terms to the front (insertBefore) */
prepend(text: String): Document
/** add these new things to the end */
concat(text: String): Document
/** sample part of the array */
random(...args: any[]): any;
setPunctuation(...args: any[]): any;
getPunctuation(...args: any[]): any;
// jquery-like api aliases
offset(...args: any[]): any;
text(...args: any[]): any;
eq(...args: any[]): any;
join(...args: any[]): any;
// transform
/**re-arrange the order of the matches (in place) */
sort(method?: String | Function): Document
/**reverse the order of the matches, but not the words */
reverse(): Document
/** clean-up the document, in various ways */
normalize(options?: String | Object): String
/** remove any duplicate matches */
unique(): Document
/** return a Document with three parts for every match ('splitOn') */
split(match?: String): Document
/** separate everything after the match as a new phrase */
splitBefore(match?: String): Document
/** separate everything before the word, as a new phrase */
splitAfter(match?: String): Document
/** split a document into labeled sections */
segment(regs: Object, options?: Object): Document
/** make all phrases into one phrase */
join(str?: String): Document
// loops
toTitleCase(...args: any[]): any;
toUpperCase(...args: any[]): any;
toLowerCase(...args: any[]): any;
toCamelCase(...args: any[]): any;
// Output
/** return the document as text */
text(options?: String | Object): String
/** pull out desired metadata from the document */
json(options?: String | Object): any
/** some named output formats */
out(format?: string): String
/** pretty-print the current document and its tags */
debug(): Document
/** store a parsed document for later use */
export(): any
hyphenate(...args: any[]): any;
dehyphenate(...args: any[]): any;
trim(...args: any[]): any;
// Selections
/** split-up results by each individual term */
terms(n?: Number): Document
/** split-up results into multi-term phrases */
clauses(n?: Number): Document
/** return all terms connected with a hyphen or dash like `'wash-out'`*/
hyphenated(n?: Number): Document
/** return things like `'(939) 555-0113'` */
phoneNumbers(n?: Number): Document
/** return things like `'#nlp'` */
hashTags(n?: Number): Document
/** return things like `'hi@compromise.cool'` */
emails(n?: Number): Document
/** return things like `:)` */
emoticons(n?: Number): Document
/** return athings like `💋` */
emoji(n?: Number): Document
/** return things like `'@nlp_compromise'`*/
atMentions(n?: Number): Document
/** return things like `'compromise.cool'` */
urls(n?: Number): Document
/** return things like `'quickly'` */
adverbs(n?: Number): Document
/** return things like `'he'` */
pronouns(n?: Number): Document
/** return things like `'but'`*/
conjunctions(n?: Number): Document
/** return things like `'of'`*/
prepositions(n?: Number): Document
/** return things like `'Mrs.'`*/
abbreviations(n?: Number): Document
insertBefore(...args: any[]): any;
insertAfter(...args: any[]): any;
insertAt(...args: any[]): any;
replace(...args: any[]): any;
replaceWith(...args: any[]): any;
delete(...args: any[]): any;
lump(...args: any[]): any;
tagger(...args: any[]): any;
tag(...args: any[]): any;
unTag(...args: any[]): any;
// match
/** do a regex-like search through terms and return a subset */
match(...args: any[]): any;
not(...args: any[]): any;
if(...args: any[]): any;
ifNo(...args: any[]): any;
has(...args: any[]): any;
/** find a match and return everything in front of it */
before(...args: any[]): any;
/** find a match and return everything after it */
after(...args: any[]): any;
// alias 'and'
and(...args: any[]): any;
notIf(...args: any[]): any;
only(...args: any[]): any;
onlyIf(...args: any[]): any;
// out
out(...args: any[]): any;
debug(...args: any[]): any;
// sort
/** reorder result.list alphabetically */
sort(...args: any[]): any;
/** reverse the order of result.list */
reverse(...args: any[]): any;
unique(...args: any[]): any;
// split
/** turn result into two seperate results */
splitAfter(...args: any[]): any;
/** turn result into two seperate results */
splitBefore(...args: any[]): any;
/** turn result into two seperate results */
splitOn(...args: any[]): any;
// normalize
normalize(...args: any[]): any;
// subsets
clauses(...args: any[]): any;
hashTags(...args: any[]): any;
organizations(...args: any[]): any;
phoneNumbers(...args: any[]): any;
places(...args: any[]): any;
quotations(...args: any[]): any;
topics(...args: any[]): any;
urls(...args: any[]): any;
questions(...args: any[]): any;
statements(...args: any[]): any;
parentheses(...args: any[]): any;
// Subsets
/** return any multi-word terms, like "didn't" */
contractions(n?: Number): Document
/** return anything inside (parentheses) */
parentheses(n?: Number): Document
/** return things like "Spencer's" */
possessives(n?: Number): Document
/** return any terms inside 'quotation marks' */
quotations(n?: Number): Document
/** return things like `'FBI'` */
acronyms(n?: Number): Document
/** return things like `'eats, shoots, and leaves'` */
lists(n?: Number): Document
/** return any subsequent terms tagged as a Noun */
nouns(n?: Number): Document
/** return any subsequent terms tagged as a Verb */
verbs(n?: Number): Document
}
}
export = compromise;
export default nlp

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc