compromise - npm Package Compare versions

Comparing version 12.3.0 to 13.0.0-rc1

builds/compromise-tokenize.js

changelog.md

		@@ -11,2 +11,38 @@ compromise uses semver, and pushes to npm frequently

		##### 13.0.0

		_major changes to `.export()` and `[capture] group` match-syntax._

		- [breaking] move .export() and .load() methods to plugin (compromise-export)
		- - change .export() format - this hasn't worked properly since v12. (mis-parsed contractions) see #669
		- [breaking] split `compromise-output` into `compromise-html` and `compromise-hash` plugins
		- [breaking] `.match('foo [bar]')` no-longer returns 'bar'. (use `.match('foo [bar]', 0)`)
		- [breaking] change `.sentences()` method to return only full-sentences of matches (use `.all()` instead)

		modifications:

		- fix nlp.clone() - hasn't worked properly, since v12. (@Drache93)
		- fix issues with greedy capture [*] and [.+] -(@Drache93) 💛
		- add whitespace properties (pre+post) to default json output (suppress with `.json({whitespace:false})`)
		- `.lookup({key:val})` with an object now returns an object back ({val: Doc})
		- add nlp constructor as a third param to `.extend()`
		- support lexicon object param in tokenize - `.tokenize('my word', {word:'tag'})`
		- clean-up of scripts and tooling
		- improved typescript types
		- add support for some french contractions like `j'aime -> je aime`
		- allow null results in `.map()` function

		new things:

		- add new named-match syntax, with .byName() method (@Drache93)
		- add `nlp.fromJSON()` method
		- add a new `compromise-tokenize.js` build, without the tagger, or data included.

		##### 12.4.0

		- adds proper `nlp.clone()` support (thanks @Drache93!)
		- better typescript support
		- allow longer acronyms
		- fix offset length issue

		##### 12.3.0
		@@ -25,3 +61,3 @@
		- verb conjugation and tagger bugfixes
		- disambiguate acryonyms & yelling
		- disambiguate between acronyms & yelling

		@@ -28,0 +64,0 @@ ##### 12.2.1

package.json

		@@ -5,3 +5,3 @@ {
		"description": "modest natural language processing",
		"version": "12.3.0",
		"version": "13.0.0-rc1",
		"main": "./builds/compromise.js",
		@@ -17,21 +17,25 @@ "unpkg": "./builds/compromise.min.js",
		},
		"engines": {
		"node": ">=6.0.0"
		},
		"scripts": {
		"build": "npm run version && rollup -c && npm run filesize",
		"build:all": "node ./scripts/buildAll.js && npm run build",
		"pack": "node ./scripts/pack.js",
		"version": "node ./scripts/version.js",
		"test": "node ./scripts/testAll.js",
		"testb": "TESTENV=prod node ./scripts/testAll.js",
		"testOne": "tape \"./tests/*/.test.js\" \| tap-dancer",
		"build": "node ./scripts/build && rollup -c --silent && npm run test:smoke --silent",
		"pack": "node ./scripts/build/pack.js",
		"test": "node ./scripts/test/index.js ",
		"testb": "npm run test:smoke && TESTENV=prod node ./scripts/test/index.js",
		"test:smoke": "node \"./scripts/test/smoke-test/\" \| tap-dancer",
		"test:spec": "tape \"./tests/*/.test.js\" \| tap-spec",
		"filesize": "node ./scripts/filesize.js",
		"test:perf": "node ./scripts/test/speed/index.js",
		"test:types": "ts-node ./scripts/test/types.ts \| tap-dancer",
		"test:stress": "node ./scripts/test/stress.js",
		"coverage": "node ./scripts/test/coverage.js",
		"coverage:html": "nyc --reporter=html tape \"./tests/*/.test.js\" \| tap-dancer --color always",
		"lint": "eslint ./src/ && eslint ./plugins/**/src/",
		"watch": "amble ./scratch.js",
		"stress": "node ./scripts/stress-test/stress.js",
		"plugins": "node ./scripts/plugin-check.js",
		"speed": "node ./scripts/stress-test/speed.js",
		"demo": "python -m SimpleHTTPServer 8888",
		"coverage": "node ./scripts/coverage.js",
		"coverage:html": "nyc --reporter=html tape \"./tests/*/.test.js\" \| tap-dancer --color always",
		"test:types": "ts-node ./types/types.test.ts \| tap-dancer",
		"lint": "eslint ./src/"
		"build:all": "node ./scripts/build/build-all.js && npm run build --silent",
		"plugins": "node ./scripts/plugin-check.js --silent",
		"plugins:install": "node ./scripts/plugins.js npm install",
		"plugins:ci": "node ./scripts/plugins.js npm ci",
		"plugins:build": "node ./scripts/plugins.js npm run build",
		"demo": "python -m SimpleHTTPServer 8888"
		},
		@@ -53,19 +57,21 @@ "files": [
		"devDependencies": {
		"@babel/core": "7.7.7",
		"@babel/preset-env": "7.7.7",
		"@babel/core": "7.8.3",
		"@babel/preset-env": "7.8.3",
		"@rollup/plugin-alias": "^3.0.0",
		"@rollup/plugin-commonjs": "^11.0.1",
		"@rollup/plugin-json": "^4.0.1",
		"@rollup/plugin-node-resolve": "^7.0.0",
		"amble": "0.0.7",
		"efrt": "2.2.1",
		"rollup": "1.28.0",
		"efrt": "2.2.2",
		"rollup": "1.30.1",
		"rollup-plugin-babel": "4.3.3",
		"rollup-plugin-commonjs": "10.1.0",
		"rollup-plugin-json": "4.0.0",
		"rollup-plugin-node-resolve": "5.2.0",
		"rollup-plugin-terser": "^5.1.3",
		"rollup-plugin-filesize-check": "0.0.1",
		"rollup-plugin-terser": "5.2.0",
		"shelljs": "0.8.3",
		"tap-dancer": "0.2.0",
		"tape": "4.12.1"
		"tape": "4.13.0"
		},
		"eslintIgnore": [
		"builds/*.js",
		"types/*.ts",
		"*.ts",
		"plugins/*/types/.ts"
		@@ -72,0 +78,0 @@ ],

README.md

		@@ -29,2 +29,5 @@ <div align="center">
		</a>
		<a href="https://spectrum.chat/nlp-compromise">
		<img src="https://img.shields.io/badge/spectrum-chat-%23b14344" />
		</a>
		</div>
		@@ -225,2 +228,15 @@ </div>

		or if you don't care about POS-tagging, you can use the tokenize-only build: (90kb!)

		```html
		<script src="https://unpkg.com/compromise/builds/compromise-tokenize.js"></script>
		<script>
		var doc = nlp('No, my son is also named Bort.')
		//you can see the text has no tags
		console.log(doc.has('#Noun')) //false
		//but the whole api still works
		console.log(doc.has('my .* is .? named /^b[oa]rt/')) //true
		</script>
		```

		<!-- spacer -->
		@@ -323,3 +339,2 @@ <img height="30" src="https://user-images.githubusercontent.com/399657/68221862-17ceb980-ffb8-11e9-87d4-7b30b6488f16.png"/>
		- [.extend()](https://observablehq.com/@spencermountain/compromise-constructor-methods) - mix in a compromise-plugin
		- [.load()](https://observablehq.com/@spencermountain/compromise-constructor-methods) - re-generate a Doc object from .export() results
		- [.verbose()](https://observablehq.com/@spencermountain/compromise-constructor-methods) - log our decision-making for debugging
		@@ -347,5 +362,8 @@ - [.version()](https://observablehq.com/@spencermountain/compromise-constructor-methods) - current semver version of the library
		- [.eq(n)](https://observablehq.com/@spencermountain/compromise-accessors) - use only the nth result
		- [.firstTerm()](https://observablehq.com/@spencermountain/compromise-accessors) - get the first word in each match
		- [.lastTerm()](https://observablehq.com/@spencermountain/compromise-accessors) - get the end word in each match
		- [.terms()](https://observablehq.com/@spencermountain/compromise-selections) - split-up results by each individual term
		- [.firstTerms()](https://observablehq.com/@spencermountain/compromise-accessors) - get the first word in each match
		- [.lastTerms()](https://observablehq.com/@spencermountain/compromise-accessors) - get the end word in each match
		- [.sentences()](https://observablehq.com/@spencermountain/compromise-accessors) - get the whole sentence for each match
		- [.termList()](https://observablehq.com/@spencermountain/compromise-accessors) - return a flat list of all Term objects in match
		- [.groups('')](https://observablehq.com/@spencermountain/compromise-accessors) - grab any named capture-groups from a match

		@@ -428,7 +446,5 @@ ##### Match
		- [.debug()](https://observablehq.com/@spencermountain/compromise-output) - pretty-print the current document and its tags
		- [.export()](https://observablehq.com/@spencermountain/compromise-export) - store a parsed document for later use

		##### Selections

		- [.terms()](https://observablehq.com/@spencermountain/compromise-selections) - split-up results by each individual term
		- [.clauses()](https://observablehq.com/@spencermountain/compromise-selections) - split-up sentences into multi-term phrases
		@@ -547,2 +563,29 @@ - [.hyphenated()](https://observablehq.com/@spencermountain/compromise-selections) - all terms connected with a hyphen or dash like `'wash-out'`

		##### Export

		`npm install compromise-export`

		- [.export()](https://observablehq.com/@spencermountain/compromise-export) - store a parsed document for later use
		- [nlp.load()](https://observablehq.com/@spencermountain/compromise-export) - re-generate a Doc object from .export() results

		##### Html

		`npm install compromise-html`

		- [.html({})](https://observablehq.com/@spencermountain/compromise-html) - generate sanitized html from the document

		##### Hash

		`npm install compromise-hash`

		- [.hash()](https://observablehq.com/@spencermountain/compromise-hash) - generate an md5 hash from the document+tags
		- [.isEqual(doc)](https://observablehq.com/@spencermountain/compromise-hash) - compare the hash of two documents for semantic-equality

		##### Keypress

		`npm install compromise-keypress`

		- [nlp.keypress('')](https://observablehq.com/@spencermountain/compromise-keypress) - generate an md5 hash from the document+tags
		- [nlp.clear('')](https://observablehq.com/@spencermountain/compromise-keypress) - clean-up any cached sentences from memory

		##### Ngrams
		@@ -560,9 +603,2 @@

		##### Output

		`npm install compromise-output`

		- [.hash()](#) - generate an md5 hash from the document+tags
		- [.html({})]() - generate sanitized html from the document

		##### Paragraphs
		@@ -744,6 +780,10 @@
		<ul>
		compromise isn't easily tree-shaken.
		we do offer a [compromise-tokenize](./builds/compromise-tokenize.js) build, which has the POS-tagger pulled-out.
		<br/>
		but otherwise, compromise isn't easily tree-shaken.
		<br/>
		the tagging methods are competitive, and greedy, so it's not recommended to pull things out.
		<br/>
		Note that without a full POS-tagging, the contraction-parser won't work perfectly. (<i>(spencer's cool)</i> vs. <i>(spencer's house)</i>)
		<br/>
		It's recommended to run the library fully.
		@@ -750,0 +790,0 @@ </ul>

238

types/index.d.ts

		export as namespace nlp

		// a key-value object of words, terms
		declare interface Lexicon {
		[key: string]: string
		}
		// documents indexed by a string
		declare interface DocIndex<W extends nlp.World = nlp.World> {
		[key: string]: nlp.Document<W>
		}

		declare interface nlp<D extends object, W extends object> {
		/** normal usage */
		(text: string): nlp.ExtendedDocument<D, W>
		(text?: string, lexicon?: Lexicon): nlp.ExtendedDocument<D, W>
		/** tozenize string */
		tokenize(text: string): nlp.ExtendedDocument<D, W>
		tokenize(text: string, lexicon?: Lexicon): nlp.ExtendedDocument<D, W>
		/** mix in a compromise-plugin */
		@@ -17,5 +26,7 @@ extend<P>(
		/** re-generate a Doc object from .json() results */
		load(json: any): nlp.ExtendedDocument<D, W>
		fromJSON(json: any): nlp.ExtendedDocument<D, W>
		/** log our decision-making for debugging */
		verbose(bool: boolean): nlp.ExtendedDocument<D, W>
		verbose(bool?: boolean): nlp.ExtendedDocument<D, W>
		/** create instance using global world*/
		clone(): nlp<D, W>
		/** current semver version of the library */
		@@ -25,4 +36,4 @@ version: nlp.ExtendedDocument<D, W>

		declare function nlp(text: string): nlp.DefaultDocument
		declare function nlp<D extends object, W extends object>(text: string): nlp.ExtendedDocument<D, W>
		declare function nlp(text?: string, lexicon?: Lexicon): nlp.DefaultDocument
		declare function nlp<D extends object, W extends object>(text?: string): nlp.ExtendedDocument<D, W>

		@@ -61,5 +72,20 @@ // possible values to .json()

		// Cleaner plugin types
		type PluginWorld<D extends object, W extends object> = {
		// Override post process type
		postProcess(process: (Doc: nlp.ExtendedDocument<D, W>) => void): nlp.ExtendedWorld<W>
		} & nlp.ExtendedWorld<W>

		type PluginDocument<D extends object, W extends object> = nlp.ExtendedDocument<D, W> & { prototype: D }

		type PluginTerm = nlp.Term & PluginConstructor

		// Make these available, full support tbd
		type PluginConstructor = {
		prototype: Record<string, any>
		}

		// Constructor
		declare module nlp {
		export function tokenize(text: string): DefaultDocument
		export function tokenize(text?: string, lexicon?: Lexicon): DefaultDocument
		/** mix in a compromise-plugin */
		@@ -70,5 +96,7 @@ export function extend<P>(
		/** re-generate a Doc object from .json() results */
		export function load(json: any): DefaultDocument
		export function fromJSON(json: any): DefaultDocument
		/** log our decision-making for debugging */
		export function verbose(bool: boolean): DefaultDocument
		export function verbose(bool?: boolean): DefaultDocument
		/** create instance using global world */
		export function clone(): nlp<{}, {}>
		/** current semver version of the library */
		@@ -78,4 +106,8 @@ export const version: number
		type Plugin<D extends object, W extends object> = (
		Doc: Document<World & W> & D & { prototype: D },
		world: World & W
		Doc: PluginDocument<D, W>,
		world: PluginWorld<D, W>,
		nlp: nlp<D, W>,
		Phrase: PluginConstructor,
		Term: PluginTerm, // @todo Add extend support
		Pool: PluginConstructor
		) => void
		@@ -126,7 +158,11 @@
		/** get the first word in each match */
		firstTerm(): Document<W>
		firstTerms(): Document<W>
		/** get the end word in each match */
		lastTerm(): Document<W>
		lastTerms(): Document<W>
		/** return a flat list of all Term objects in match */
		termList(): any
		termList(): Term[]
		/** grab a specific named capture group */
		groups(name: string): Document<W>
		/** grab all named capture groups */
		groups(): DocIndex<W>

		@@ -156,2 +192,4 @@ // Match
		lookup(matches: string[]): Document<W>
		/** quick find for an object of key-value matches */
		lookup(matches: Lexicon): DocIndex<W>

		@@ -298,3 +336,3 @@ // Case
		// Subsets
		/** alias for .all(), until plugin overloading */
		/** get the whole sentence for each match */
		sentences(): Document<W>
		@@ -418,5 +456,173 @@ /** return things like `'Mrs.'`*/

		class World {}
		class World {
		/** more logs for debugging */
		verbose(on?: boolean): this
		isVerbose(): boolean

		/** get all terms in our lexicon with this tag */
		getByTag(tag: string): Record<string, true>

		/** put new words into our lexicon, properly */
		addWords(words: Record<string, string>): void

		/** extend the compromise tagset */
		addTags(
		tags: Record<
		string,
		{
		isA?: string \| string[]
		notA?: string \| string[]
		}
		>
		): void

		/** call methods after tagger runs */
		postProcess<D extends Document = Document>(process: (Doc: D) => void): this
		}

		// @todo
		interface RegSyntax {
		[index: string]: any
		}

		type TextOutOptions =
		\| 'reduced'
		\| 'root'
		\| 'implicit'
		\| 'normal'
		\| 'unicode'
		\| 'titlecase'
		\| 'lowercase'
		\| 'acronyms'
		\| 'whitespace'
		\| 'punctuation'
		\| 'abbreviations'

		type JsonOutOptions = 'text' \| 'normal' \| 'tags' \| 'clean' \| 'id' \| 'offset' \| 'implicit' \| 'whitespace' \| 'bestTag'

		class Term {
		isA: 'Term' // Get Type
		id: string

		// main data
		text: string
		tags: Record<string, boolean>

		// alternative forms of this.text
		root: string \| null
		implicit: string \| null
		clean?: string
		reduced?: string

		// additional surrounding information
		prev: string \| null // id of prev term
		next: string \| null // id of next term
		pre?: string // character before e.g. ' ' ','
		post?: string // character after e.g. ' ' ','

		// support alternative matches
		alias?: string

		constructor(text?: string)
		set(text: string): this

		/** clone contents to new term */
		clone(): Term

		/** convert all text to uppercase */
		toUpperCase(): this

		/** convert all text to lowercase */
		toLowerCase(): this

		/** only set the first letter to uppercase
		* leave any existing uppercase alone
		*/
		toTitleCase(): this

		/** if all letters are uppercase */
		isUpperCase(): this

		/** if the first letter is uppercase, and the rest are lowercase */
		isTitleCase(): this
		titleCase(): this

		/** search the term's 'post' punctuation */
		hasPost(): boolean

		/** search the term's 'pre' punctuation */
		hasPre(): boolean

		/** does it have a quotation symbol? */
		hasQuote(): boolean
		hasQuotation(): boolean

		/** does it have a comma? */
		hasComma(): boolean

		/** does it end in a period? */
		hasPeriod(): boolean

		/** does it end in an exclamation */
		hasExclamation(): boolean

		/** does it end with a question mark? */
		hasQuestionMark(): boolean

		/** is there a ... at the end? */
		hasEllipses(): boolean

		/** is there a semicolon after this word? */
		hasSemicolon(): boolean

		/** is there a slash '/' in this word? */
		hasSlash(): boolean

		/** a hyphen connects two words like-this */
		hasHyphen(): boolean

		/** a dash separates words - like that */
		hasDash(): boolean

		/** is it multiple words combinded */
		hasContraction(): boolean

		/** try to sensibly put this punctuation mark into the term */
		addPunctuation(punct: string): this

		doesMatch(reg: RegSyntax, index: number, length: number): boolean

		/** does this term look like an acronym? */
		isAcronym(): boolean

		/** is this term implied by a contraction? */
		isImplicit(): boolean

		/** does the term have at least one good tag? */
		isKnown(): boolean

		/** cache the root property of the term */
		setRoot(world: World): void

		/** return various text formats of this term */
		textOut(options?: Record<TextOutOptions, boolean>, showPre?: boolean, showPost?: boolean): string

		/** return various metadata for this term */
		// @todo create output type from options...
		json(options?: Record<JsonOutOptions, boolean>, world?: World): object

		/** add a tag or tags, and their descendents to this term */
		tag(tags: string \| string[], reason?: string, world?: World): this

		/** only tag this term if it's consistent with it's current tags */
		tagSafe(tags: string \| string[], reason?: string, world?: World): this

		/** remove a tag or tags, and their descendents from this term */
		unTag(tags: string \| string[], reason?: string, world?: World): this

		/** is this tag consistent with the word's current tags? */
		canBe(tags: string \| string[], world?: World): boolean
		}
		}

		export default nlp

builds/compromise.js

Sorry, the diff of this file is too big to display

builds/compromise.js.map

Sorry, the diff of this file is not supported yet

builds/compromise.min.js

Sorry, the diff of this file is too big to display

builds/compromise.mjs

Sorry, the diff of this file is not supported yet

compromise - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics