Comparing version 1.12.3 to 1.13.0
@@ -1,1 +0,1 @@ | ||
{"processes":{"ecaabe35-e24e-4d7b-931f-1f56d35101ff":{"parent":null,"children":[]}},"files":{"/Users/neilsbohr/dev/winkjs/wink-nlp/src/wink-nlp.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/dd-wrapper.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/constants.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/doc-v2.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-entities.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/locate.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/get-parent-item.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/search.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-get-item.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-get-item.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-each.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-each.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-filter.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-filter.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-token-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/its.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/sort4FT.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/allowed.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/as.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-markings.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-tokens-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-tokens-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-entity-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-entities-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-entities-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-sentence-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-sentences-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-document-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/print-tokens.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/cache.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokenizer.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/recursive-tokenizer.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compile-trex.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokens-mappers.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/examples-compiler.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/automaton.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compose-patterns.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/helper.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/bm25-vectorizer.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/allowed.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/similarity.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"]},"externalIds":{}} | ||
{"processes":{"64286668-c8e7-4a6e-b8a7-c1395bfb6e04":{"parent":null,"children":[]}},"files":{"/Users/neilsbohr/dev/winkjs/wink-nlp/src/wink-nlp.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/dd-wrapper.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/constants.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/doc-v2.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-entities.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/locate.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/get-parent-item.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/search.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-get-item.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-get-item.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-each.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-each.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-filter.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-filter.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-token-out.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/its.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/sort4FT.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/allowed.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/as.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-markings.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-tokens-out.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-tokens-out.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-entity-out.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-entities-out.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-entities-out.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-sentence-out.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-sentences-out.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-document-out.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/print-tokens.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/cache.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokenizer.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/recursive-tokenizer.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compile-trex.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokens-mappers.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/examples-compiler.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/automaton.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compose-patterns.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/identify-marked-area.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/helper.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/bm25-vectorizer.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/allowed.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/similarity.js":["64286668-c8e7-4a6e-b8a7-c1395bfb6e04"]},"externalIds":{}} |
@@ -0,1 +1,9 @@ | ||
# [Improving mark's functionality in custom entities](https://github.com/winkjs/wink-nlp/releases/tag/1.13.0) | ||
## Version 1.13.0 December 09, 2022 | ||
### ✨ Features | ||
- Mark allows marking w.r.t. the last element of the pattern. For example if a pattern matches `a fluffy cat` then `mark: [-2, -1]` will extract `fluffy cat` — especially useful when the match length is unknown. 💃 | ||
- Improved error handling while processing mark's arguments. 🙌 | ||
# [Operational update](https://github.com/winkjs/wink-nlp/releases/tag/1.12.3) | ||
@@ -2,0 +10,0 @@ ## Version 1.12.3 November 18, 2022 |
{ | ||
"name": "wink-nlp", | ||
"version": "1.12.3", | ||
"version": "1.13.0", | ||
"description": "Developer friendly Natural Language Processing ✨", | ||
@@ -49,8 +49,8 @@ "keywords": [ | ||
"benchmark": "^2.1.4", | ||
"chai": "^4.3.6", | ||
"chai": "^4.3.7", | ||
"coveralls": "^3.1.1", | ||
"docker": "^1.0.0", | ||
"dtslint": "^4.2.1", | ||
"eslint": "^8.25.0", | ||
"mocha": "^10.0.0", | ||
"eslint": "^8.29.0", | ||
"mocha": "^10.1.0", | ||
"nyc": "^15.1.0" | ||
@@ -57,0 +57,0 @@ }, |
@@ -15,5 +15,5 @@ # winkNLP | ||
## Build amazing apps quickly | ||
| [Wikipedia article timeline](https://winkjs.org/showcase-timeline/) | [Context aware word cloud](https://observablehq.com/@winkjs/how-to-create-a-context-aware-word-cloud) | [Key sentences detection](https://observablehq.com/@winkjs/how-to-visualize-key-sentences-in-a-document) | | ||
| [Wikipedia article timeline](https://observablehq.com/@winkjs/how-to-visualize-timeline-of-a-wiki-article) | [Context aware word cloud](https://observablehq.com/@winkjs/how-to-create-a-context-aware-word-cloud) | [Key sentences detection](https://observablehq.com/@winkjs/how-to-visualize-key-sentences-in-a-document) | | ||
| --- | --- | --- | | ||
| [<img src="https://user-images.githubusercontent.com/29990/202497363-19c30578-8146-4f36-9c4b-4de613610837.png">](https://winkjs.org/showcase-timeline/)| [<img src="https://user-images.githubusercontent.com/29990/202506181-1a926ee0-788f-4aa1-aeac-a097f09fe747.png">](https://observablehq.com/@winkjs/how-to-create-a-context-aware-word-cloud)|[<img src="https://user-images.githubusercontent.com/29990/202506490-7f999d12-8319-4969-b92b-0649559ffbe6.png">](https://observablehq.com/@winkjs/how-to-visualize-key-sentences-in-a-document)| | ||
| [<img src="https://user-images.githubusercontent.com/29990/202497363-19c30578-8146-4f36-9c4b-4de613610837.png">](https://observablehq.com/@winkjs/how-to-visualize-timeline-of-a-wiki-article)| [<img src="https://user-images.githubusercontent.com/29990/202506181-1a926ee0-788f-4aa1-aeac-a097f09fe747.png">](https://observablehq.com/@winkjs/how-to-create-a-context-aware-word-cloud)|[<img src="https://user-images.githubusercontent.com/29990/202506490-7f999d12-8319-4969-b92b-0649559ffbe6.png">](https://observablehq.com/@winkjs/how-to-visualize-key-sentences-in-a-document)| | ||
@@ -23,3 +23,3 @@ Head to [live examples](https://winkjs.org/examples.html) to explore further. | ||
## Blazing fast | ||
WinkNLP can easily process large amount of raw text at speeds over <mark>**650,000 tokens/second**</mark> on a M1 Macbook Pro in both browser and Node.js environments. It even runs smoothly on a low-end smartphone's browser. | ||
WinkNLP can easily process large amount of raw text at speeds over **650,000 tokens/second** on a M1 Macbook Pro in both browser and Node.js environments. It even runs smoothly on a low-end smartphone's browser. | ||
@@ -35,12 +35,14 @@ | Environment | Benchmarking Command | | ||
<table> | ||
<tr><td width="330px;">Fast, lossless & multilingual tokenizer ⚡️</td><td>For example, the text string <b><code style="font-size: 0.9em">"¡Hola! नमस्कार! Hi! Bonjour chéri"</code></b> tokenizes as <code style="font-size: 0.9em">["¡", "Hola", "!", "नमस्कार", "!", "Hi", "!", "Bonjour", "chéri"]</code>. It tokenizes text at a speed close to <b>4 million</b> tokens/second on a M1 MBP's browser.</td></tr> | ||
<tr><td>Developer friendly and intuitive <a href="https://winkjs.org/wink-nlp/getting-started.html">API</a> 💚</td><td>As simple as DOM manipulation; most <a href="https://observablehq.com/@winkjs/how-to-build-a-naive-wikification-tool?collection=@winkjs/winknlp-recipes">live examples</a> have <b>30-40</b> lines of code.</td></tr> | ||
<tr><td>Best-in-class <a href="https://winkjs.org/wink-nlp/visualizing-markup.html">text visualization</a> 🖼</td><td>Programmatically <b><a href="https://winkjs.org/wink-nlp/markup.html">mark</a></b> tokens, sentences, entities, etc. using HTML mark or any other tag of your choice.</td></tr> | ||
<tr><td>Extensive text processing features ♻️</td><td>Checkout how a <a href="https://github.com/winkjs/wink-naive-bayes-text-classifier#readme">Naive Bayes classifier</a> achieves <b>impressive</b> chatbot intent classification accuracy with right kind of preprocessing with winkNLP.</td></tr> | ||
<tr><td>Pre-trained <a href="https://winkjs.org/wink-nlp/language-models.html">language models</a> 🔠</td><td>Compact sizes starting from <b><3MB</b>.</td></tr> | ||
<tr><td>Host of <a href="https://winkjs.org/wink-nlp/its-as-helper.html">utilities & tools</a> 💼</td><td>BM25 vectorizer; Several similarity methods – Cosine, Tversky, Sørensen-Dice, Otsuka-Ochiai; Helpers to get bag of words, frequency table, lemma/stem, stop word removal and many more.</td></tr> | ||
<tr><td width="330px;">🐎 Fast, lossless & multilingual tokenizer </td><td>For example, the multilingual text string <b><code style="font-size: 0.9em">"¡Hola! नमस्कार! Hi! Bonjour chéri"</code></b> is tokenized as <code style="font-size: 0.9em">["¡", "Hola", "!", "नमस्कार", "!", "Hi", "!", "Bonjour", "chéri"]</code>. The tokenizer processes text at a speed close to <b>4 million</b> tokens/second on a M1 MBP's browser.</td></tr> | ||
<tr><td>✨ Developer friendly and intuitive <a href="https://winkjs.org/wink-nlp/getting-started.html">API</a></td><td>With winkNLP, process any text using a simple, declarative syntax; most <a href="https://observablehq.com/@winkjs/how-to-build-a-naive-wikification-tool?collection=@winkjs/winknlp-recipes">live examples</a> have <b>30-40</b> lines of code.</td></tr> | ||
<tr><td>🖼 Best-in-class <a href="https://winkjs.org/wink-nlp/visualizing-markup.html">text visualization</a></td><td>Programmatically <b><a href="https://winkjs.org/wink-nlp/markup.html">mark</a></b> tokens, sentences, entities, etc. using HTML mark or any other tag of your choice.</td></tr> | ||
<tr><td>♻️ Extensive text processing features</td><td>Remove and/or retain tokens with specific attributes such as part-of-speech, named entity type, token type, stop word, shape and many more; compute Flesch reading ease score; generate n-grams; normalize, lemmatise or stem. Checkout how with the right kind of text preprocessing, even <a href="https://github.com/winkjs/wink-naive-bayes-text-classifier#readme">Naive Bayes classifier</a> achieves <b>impressive (≥90%)</b> accuracy in sentiment analysis and chatbot intent classification tasks.</td></tr> | ||
<tr><td>🔠 Pre-trained <a href="https://winkjs.org/wink-nlp/language-models.html">language models</a></td><td>Compact sizes starting from <b><3MB</b> – reduced model loading time drastically.</td></tr> | ||
<tr><td>💼 Host of <a href="https://winkjs.org/wink-nlp/its-as-helper.html">utilities & tools</a></td><td>BM25 vectorizer; Several similarity methods – Cosine, Tversky, Sørensen-Dice, Otsuka-Ochiai; Helpers to get bag of words, frequency table, lemma/stem, stop word removal and many more.</td></tr> | ||
</table> | ||
WinkJS also has packages like [Naive Bayes classifier](https://github.com/winkjs/wink-naive-bayes-text-classifier), [multi-class averaged perceptron](https://github.com/winkjs/wink-perceptron) and [popular token and string distance methods](https://github.com/winkjs/wink-distance), which complement winkNLP. | ||
> WinkJS also has packages like [Naive Bayes classifier](https://github.com/winkjs/wink-naive-bayes-text-classifier), [multi-class averaged perceptron](https://github.com/winkjs/wink-perceptron) and [popular token and string distance methods](https://github.com/winkjs/wink-distance), which complement winkNLP. | ||
## Documentation | ||
@@ -131,3 +133,3 @@ - [Concepts](https://winkjs.org/wink-nlp/getting-started.html) — everything you need to know to get started. | ||
### New feature ✨ | ||
### New feature 🌟 | ||
Looking for a new feature, request it via the [new features & ideas](https://github.com/winkjs/wink-nlp/discussions/categories/new-features-ideas) discussion forum or consider becoming a [contributor](https://github.com/winkjs/wink-nlp/blob/master/CONTRIBUTING.md). | ||
@@ -143,2 +145,2 @@ | ||
It is licensed under the terms of the MIT License. | ||
It is licensed under the terms of the MIT License. |
@@ -35,3 +35,4 @@ // wink-nlp | ||
/* eslint-disable guard-for-in */ | ||
var composePatterns = require( './compose-patterns.js' ); | ||
const composePatterns = require( './compose-patterns.js' ); | ||
const identifyMarkedArea = require( './identify-marked-area.js' ); | ||
@@ -176,3 +177,3 @@ const eosTokenN = 2070000; | ||
// execution. Update must happen as a deep copy & not directly! | ||
markedStates[ state ] = [ mark[ 0 ], ( length - mark[ 1 ] - 1 ) ]; | ||
markedStates[ state ] = identifyMarkedArea( mark, length ); | ||
} | ||
@@ -263,3 +264,2 @@ | ||
var mark = markedStates[ m0 ]; | ||
var customProperty = customPropertyAtStates[ m0 ]; | ||
@@ -266,0 +266,0 @@ if ( mark ) { |
@@ -60,8 +60,21 @@ // wink-nlp | ||
/** | ||
* Creates instance of compiler using input arguments. | ||
* @param {JSON} cerModel meta model used during compilation. | ||
* @param {object} cache the wink-nlp cache. | ||
* @param {function} tokenize tokenizer function from wink-nlp. | ||
* @param {boolean} matchValue config.matchValue of learnCustomEntities() API. | ||
* @return {object} contains run function. | ||
* It transforms the input patterns for custom entity recognition into a model, | ||
* which is run by winkNLP's `readDoc()` method. The model is created by | ||
* the `learnCustomEntities()` method of core winkNLP using this compiler. Brefore | ||
* the compiler can be **run**, its instance must be created using the following | ||
* parameters: | ||
* | ||
* @param {JSON} cerModel precompiled custom entity meta model — handles escaping | ||
* of entity literals. For example `^ADJ` will match | ||
* with token `ADJ` (or `adj` based on `matchValue` in | ||
* `cerConfig`), whereas `ADJ` will match with the | ||
* adjective part-of-speech of a token. | ||
* @param {object} cache of lexicon, which is required to deliver performance. | ||
* @param {function} tokenize is instantiated from core tokenizer, which tokenises the | ||
* input patterns. It is used in the `tokenizeText()` private | ||
* method of compiler. | ||
* @param {boolean} matchValue match value flag — defines match on either `value` or | ||
* `normal` of tokens.<br/> | ||
* @return {object} contains **run** function, which can compile the input | ||
* pattern into a model. | ||
* @private | ||
@@ -76,2 +89,4 @@ */ | ||
cerAutomata.importJSON( cerModel ); | ||
// On pattern detection, we need to save the custom property — `preserve` | ||
// created by the `cerModel's` execution. | ||
cerAutomata.setOnPatternDetectionFn( ( match, customProperty ) => ( match.push( customProperty ) ) ); | ||
@@ -78,0 +93,0 @@ |
@@ -35,10 +35,60 @@ // wink-nlp | ||
/** | ||
* Tests if argument `v` is a JS object. | ||
* | ||
* @param {*} v is tested for a valid JS object. | ||
* @returns {boolean} ture if `v` is a valid JS object, otherwise false. | ||
*/ | ||
helper.isObject = function ( v ) { | ||
return ( v && ( Object.prototype.toString.call( v ) === '[object Object]' ) ) ? true : false; // eslint-disable-line no-unneeded-ternary | ||
return ( Object.prototype.toString.call( v ) === '[object Object]' ); | ||
}; // isObject() | ||
/** | ||
* Tests if argument `v` is a JS array. | ||
* | ||
* @param {*} v is tested for a valid JS array. | ||
* @returns {boolean} ture if `v` is a valid JS array, otherwise false. | ||
*/ | ||
helper.isArray = function ( v ) { | ||
return ( ( v !== undefined ) && ( v !== null ) && ( Object.prototype.toString.call( v ) === '[object Array]' ) ); | ||
return ( Object.prototype.toString.call( v ) === '[object Array]' ); | ||
}; // isArray() | ||
/** | ||
* Tests if argument `n` is a finite integer. | ||
* | ||
* @param {*} n is tested for a finite integer. | ||
* @returns {boolean} ture if `n` is a finite integer, otherwise false. | ||
*/ | ||
helper.isFiniteInteger = function ( n ) { | ||
return ( | ||
( typeof n === 'number' ) && | ||
!isNaN( n ) && | ||
isFinite( n ) && | ||
( n === Math.round( n ) ) | ||
); | ||
}; // isFiniteInteger() | ||
/** | ||
* Tests if argument `a` contains one or more finite integers. | ||
* | ||
* @param {*} a is tested for an array of finite integers. | ||
* @returns {boolean} ture if `n` is an array of finite integers, otherwise false. | ||
*/ | ||
helper.isIntegerArray = function ( a ) { | ||
// If it is not an array, return `false`. | ||
if ( !helper.isArray( a ) ) return false; | ||
// Has no element i.e. no finite integer — return `false`. | ||
if ( a.length === 0 ) return false; | ||
// Test every element for a finite integer. | ||
for ( let i = 0; i < a.length; i += 1 ) { | ||
// Any failure means immediately return `false`. | ||
if ( !helper.isFiniteInteger( a[ i ] ) ) return false; | ||
} | ||
// It is an array and contains all finite integers, return `true`. | ||
return true; | ||
}; // isIntegerArray() | ||
module.exports = helper; |
@@ -355,2 +355,10 @@ // wink-nlp | ||
} | ||
// If mark is present then it should be an array of integers **and** its length must | ||
// be equal to 2 **and** start index <= end index. | ||
if ( ( ex.mark !== undefined ) && | ||
( !helper.isIntegerArray( ex.mark ) || | ||
( ex.mark.length !== 2 ) || | ||
( ex.mark.length === 2 && ex.mark[ 0 ] > ex.mark[ 1 ] ) ) ) { | ||
throw Error( `wink-nlp: mark should be an array containing start & end indexes, instead found:\n\n${JSON.stringify( ex.mark, null, 2 )}` ); | ||
} | ||
} else { | ||
@@ -357,0 +365,0 @@ // Example is not an object. |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
584883
61
6076
142