wink-nlp
Advanced tools
Comparing version 1.12.2 to 1.12.3
@@ -1,1 +0,1 @@ | ||
{"processes":{"f1973c1f-52a2-4843-8013-c62d6114130a":{"parent":null,"children":[]}},"files":{"/Users/neilsbohr/dev/winkjs/wink-nlp/src/wink-nlp.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/dd-wrapper.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/constants.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/doc-v2.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-entities.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/locate.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/get-parent-item.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/search.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-get-item.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-get-item.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-each.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-each.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-filter.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-filter.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-token-out.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/its.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/sort4FT.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/allowed.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/as.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-markings.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-tokens-out.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-tokens-out.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-entity-out.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-entities-out.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-entities-out.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-sentence-out.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-sentences-out.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-document-out.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/print-tokens.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/cache.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokenizer.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/recursive-tokenizer.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compile-trex.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokens-mappers.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/examples-compiler.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/automaton.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compose-patterns.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/helper.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/bm25-vectorizer.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/allowed.js":["f1973c1f-52a2-4843-8013-c62d6114130a"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/similarity.js":["f1973c1f-52a2-4843-8013-c62d6114130a"]},"externalIds":{}} | ||
{"processes":{"ecaabe35-e24e-4d7b-931f-1f56d35101ff":{"parent":null,"children":[]}},"files":{"/Users/neilsbohr/dev/winkjs/wink-nlp/src/wink-nlp.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/dd-wrapper.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/constants.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/doc-v2.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-entities.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/locate.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/get-parent-item.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/search.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-get-item.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-get-item.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-each.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-each.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-filter.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-filter.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-token-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/its.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/sort4FT.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/allowed.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/as.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-markings.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-tokens-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-tokens-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-entity-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-entities-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-entities-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-sentence-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-sentences-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-document-out.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/print-tokens.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/cache.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokenizer.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/recursive-tokenizer.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compile-trex.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokens-mappers.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/examples-compiler.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/automaton.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compose-patterns.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/helper.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/bm25-vectorizer.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/allowed.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/similarity.js":["ecaabe35-e24e-4d7b-931f-1f56d35101ff"]},"externalIds":{}} |
@@ -0,1 +1,9 @@ | ||
# [Operational update](https://github.com/winkjs/wink-nlp/releases/tag/1.12.3) | ||
## Version 1.12.3 November 18, 2022 | ||
### ⚙️ Updates | ||
- README is now more informative and links to examples and benchmarks 👍 | ||
- Benchmarked on latest machine, browser versions 🖥 | ||
# [Ready for Node.js version 18](https://github.com/winkjs/wink-nlp/releases/tag/1.12.2) | ||
@@ -2,0 +10,0 @@ ## Version 1.12.2 October 13, 2022 |
{ | ||
"name": "wink-nlp", | ||
"version": "1.12.2", | ||
"version": "1.12.3", | ||
"description": "Developer friendly Natural Language Processing ✨", | ||
@@ -49,8 +49,8 @@ "keywords": [ | ||
"benchmark": "^2.1.4", | ||
"chai": "^4.3.4", | ||
"chai": "^4.3.6", | ||
"coveralls": "^3.1.1", | ||
"docker": "^1.0.0", | ||
"dtslint": "^4.1.3", | ||
"eslint": "^7.32.0", | ||
"mocha": "^9.0.3", | ||
"dtslint": "^4.2.1", | ||
"eslint": "^8.25.0", | ||
"mocha": "^10.0.0", | ||
"nyc": "^15.1.0" | ||
@@ -57,0 +57,0 @@ }, |
125
README.md
@@ -8,40 +8,43 @@ # winkNLP | ||
winkNLP is a JavaScript library for Natural Language Processing (NLP). Designed specifically to make development of NLP solutions **easier** and **faster**, winkNLP is optimized for the right balance of performance and accuracy. The package can handle large amount of raw text at speeds over **525,000 tokens/second**. And with a test coverage of ~100%, winkNLP is a tool for building production grade systems with confidence. | ||
WinkNLP is a JavaScript library for Natural Language Processing (NLP). Designed specifically to make development of NLP applications **easier** and **faster**, winkNLP is optimized for the right balance of performance and accuracy. | ||
[<img src="https://user-images.githubusercontent.com/9491/100614781-ad17bb00-333c-11eb-87ab-2ae41aa21285.png" alt="Wink Wizard Showcase">](https://winkjs.org/showcase-wiz/) | ||
It is built ground up with a lean code base that has [no external dependency](https://snyk.io/test/github/winkjs/wink-nlp?tab=dependencies). A test coverage of [~100%](https://coveralls.io/github/winkjs/wink-nlp?branch=master) and compliance with the [Open Source Security Foundation best practices](https://bestpractices.coreinfrastructure.org/en/projects/6035) make winkNLP the ideal tool for building production grade systems with confidence. | ||
WinkNLP with full [Typescript support](https://github.com/winkjs/wink-nlp/blob/master/types/index.d.ts), runs on Node.js and browsers. | ||
## Features | ||
WinkNLP has a comprehensive natural language processing (NLP) pipeline covering tokenization, sentence boundary detection (sbd), negation handling, sentiment analysis, part-of-speech (pos) tagging, named entity recognition (ner), custom entities recognition (cer): | ||
## Build amazing apps quickly | ||
| [Wikipedia article timeline](https://winkjs.org/showcase-timeline/) | [Context aware word cloud](https://observablehq.com/@winkjs/how-to-create-a-context-aware-word-cloud) | [Key sentences detection](https://observablehq.com/@winkjs/how-to-visualize-key-sentences-in-a-document) | | ||
| --- | --- | --- | | ||
| [<img src="https://user-images.githubusercontent.com/29990/202497363-19c30578-8146-4f36-9c4b-4de613610837.png">](https://winkjs.org/showcase-timeline/)| [<img src="https://user-images.githubusercontent.com/29990/202506181-1a926ee0-788f-4aa1-aeac-a097f09fe747.png">](https://observablehq.com/@winkjs/how-to-create-a-context-aware-word-cloud)|[<img src="https://user-images.githubusercontent.com/29990/202506490-7f999d12-8319-4969-b92b-0649559ffbe6.png">](https://observablehq.com/@winkjs/how-to-visualize-key-sentences-in-a-document)| | ||
<img src="https://winkjs.org/images/wink-nlp-processing-pipeline.png" alt="Processing pipeline: text, tokenization, SBD, negation, sentiment, NER, POS, CER" title="WinkNLP processing pipeline"> | ||
Head to [live examples](https://winkjs.org/examples.html) to explore further. | ||
At every stage a range of properties become accessible for tokens, sentences, and entities. Read more about the processing pipeline and how to configure it in the [winkNLP documentation](https://winkjs.org/wink-nlp/processing-pipeline.html). | ||
## Blazing fast | ||
WinkNLP can easily process large amount of raw text at speeds over <mark>**650,000 tokens/second**</mark> on a M1 Macbook Pro in both browser and Node.js environments. It even runs smoothly on a low-end smartphone's browser. | ||
| Environment | Benchmarking Command | | ||
|--- | --- | | ||
| Node.js | [node benchmark/run](https://github.com/winkjs/wink-nlp/tree/master/benchmark) | | ||
| Browser | [How to measure winkNLP's speed on browsers?](https://observablehq.com/@winkjs/how-to-measure-winknlps-speed-on-browsers) | | ||
It packs a rich feature set into a small foot print codebase of [under 1500 lines](https://coveralls.io/github/winkjs/wink-nlp?branch=master): | ||
## Features | ||
WinkNLP has a [comprehensive natural language processing (NLP) pipeline](https://winkjs.org/wink-nlp/processing-pipeline.html) covering tokenization, sentence boundary detection (sbd), negation handling, sentiment analysis, part-of-speech (pos) tagging, named entity recognition (ner), custom entities recognition (cer). It offers a rich feature set: | ||
1. Fast, lossless & multilingual [tokenizer](https://winkjs.org/wink-nlp/processing-pipeline.html) | ||
<table> | ||
<tr><td width="330px;">Fast, lossless & multilingual tokenizer ⚡️</td><td>For example, the text string <b><code style="font-size: 0.9em">"¡Hola! नमस्कार! Hi! Bonjour chéri"</code></b> tokenizes as <code style="font-size: 0.9em">["¡", "Hola", "!", "नमस्कार", "!", "Hi", "!", "Bonjour", "chéri"]</code>. It tokenizes text at a speed close to <b>4 million</b> tokens/second on a M1 MBP's browser.</td></tr> | ||
<tr><td>Developer friendly and intuitive <a href="https://winkjs.org/wink-nlp/getting-started.html">API</a> 💚</td><td>As simple as DOM manipulation; most <a href="https://observablehq.com/@winkjs/how-to-build-a-naive-wikification-tool?collection=@winkjs/winknlp-recipes">live examples</a> have <b>30-40</b> lines of code.</td></tr> | ||
<tr><td>Best-in-class <a href="https://winkjs.org/wink-nlp/visualizing-markup.html">text visualization</a> 🖼</td><td>Programmatically <b><a href="https://winkjs.org/wink-nlp/markup.html">mark</a></b> tokens, sentences, entities, etc. using HTML mark or any other tag of your choice.</td></tr> | ||
<tr><td>Extensive text processing features ♻️</td><td>Checkout how a <a href="https://github.com/winkjs/wink-naive-bayes-text-classifier#readme">Naive Bayes classifier</a> achieves <b>impressive</b> chatbot intent classification accuracy with right kind of preprocessing with winkNLP.</td></tr> | ||
<tr><td>Pre-trained <a href="https://winkjs.org/wink-nlp/language-models.html">language models</a> 🔠</td><td>Compact sizes starting from <b><3MB</b>.</td></tr> | ||
<tr><td>Host of <a href="https://winkjs.org/wink-nlp/its-as-helper.html">utilities & tools</a> 💼</td><td>BM25 vectorizer; Several similarity methods – Cosine, Tversky, Sørensen-Dice, Otsuka-Ochiai; Helpers to get bag of words, frequency table, lemma/stem, stop word removal and many more.</td></tr> | ||
</table> | ||
2. Developer friendly and intuitive [API](https://winkjs.org/wink-nlp/getting-started.html) | ||
WinkJS also has packages like [Naive Bayes classifier](https://github.com/winkjs/wink-naive-bayes-text-classifier), [multi-class averaged perceptron](https://github.com/winkjs/wink-perceptron) and [popular token and string distance methods](https://github.com/winkjs/wink-distance), which complement winkNLP. | ||
3. Built-in [API](https://winkjs.org/wink-nlp/visualizing-markup.html) to aid [text visualization](https://observablehq.com/@winkjs/how-to-perform-sentiment-analysis?collection=@winkjs/winknlp-recipes) | ||
## Documentation | ||
- [Concepts](https://winkjs.org/wink-nlp/getting-started.html) — everything you need to know to get started. | ||
- [API Reference](https://winkjs.org/wink-nlp/read-doc.html) — explains usage of APIs with examples. | ||
- [Change log](https://github.com/winkjs/wink-nlp/blob/master/CHANGELOG.md) — version history along with the details of breaking changes, if any. | ||
- [Examples](https://winkjs.org/examples.html) — live examples with code to give you a head start. | ||
4. Extensive [text processing features](https://winkjs.org/wink-nlp/its-as-helper.html) such as bag-of-words, frequency table, stop word removal, readability statistics computation and many more. | ||
5. Pre-trained [language models](https://winkjs.org/wink-nlp/language-models.html) with sizes starting from <3MB onwards | ||
6. [BM25-based vectorizer](https://winkjs.org/wink-nlp/bm25-vectorizer.html) | ||
7. Multiple [similarity](https://winkjs.org/wink-nlp/similarity.html) methods | ||
8. Word vector integration | ||
9. No external dependencies | ||
10. [Runs on web browsers](https://winkjs.org/wink-nlp/wink-nlp-in-browsers.html) | ||
11. [Typescript support](https://github.com/winkjs/wink-nlp/blob/master/types/index.d.ts). | ||
## Installation | ||
@@ -55,3 +58,3 @@ | ||
In order to use winkNLP after its installation, you also need to install a language model according to the node version used. The following table outlines the version specific installation command: | ||
In order to use winkNLP after its installation, you also need to install a language model according to the node version used. The table below outlines the version specific installation command: | ||
@@ -63,3 +66,3 @@ | Node.js Version |Installation | | ||
The [wink-eng-lite-web-model](https://github.com/winkjs/wink-eng-lite-web-model) is designed to work with Node.js version 16 or 18. It can also work on browsers as described in the next section. | ||
The [wink-eng-lite-web-model](https://github.com/winkjs/wink-eng-lite-web-model) is designed to work with Node.js version 16 or 18. It can also work on browsers as described in the next section. This is the **recommended** model. | ||
@@ -71,60 +74,45 @@ The second command installs the [wink-eng-lite-model](https://github.com/winkjs/wink-eng-lite-model), which works with Node.js version 14 or 12. | ||
## Getting Started | ||
The "Hello World!" in winkNLP is given below: | ||
### Get started | ||
Here is the "Hello World!" of winkNLP: | ||
```javascript | ||
// Load wink-nlp package & helpers. | ||
// Load wink-nlp package. | ||
const winkNLP = require( 'wink-nlp' ); | ||
// Load "its" helper to extract item properties. | ||
const its = require( 'wink-nlp/src/its.js' ); | ||
// Load "as" reducer helper to reduce a collection. | ||
const as = require( 'wink-nlp/src/as.js' ); | ||
// Load english language model — light version. | ||
const model = require( 'wink-eng-lite-model' ); | ||
// Load english language model. | ||
const model = require( 'wink-eng-lite-web-model' ); | ||
// Instantiate winkNLP. | ||
const nlp = winkNLP( model ); | ||
// Obtain "its" helper to extract item properties. | ||
const its = nlp.its; | ||
// Obtain "as" reducer helper to reduce a collection. | ||
const as = nlp.as; | ||
// NLP Code. | ||
const text = 'Hello World🌎! How are you?'; | ||
const doc = nlp.readDoc( text ); | ||
console.log( doc.out() ); | ||
// -> Hello World🌎! How are you? | ||
console.log( doc.sentences().out() ); | ||
// -> [ 'Hello World🌎!', 'How are you?' ] | ||
console.log( doc.entities().out( its.detail ) ); | ||
// -> [ { value: '🌎', type: 'EMOJI' } ] | ||
console.log( doc.tokens().out() ); | ||
// -> [ 'Hello', 'World', '🌎', '!', 'How', 'are', 'you', '?' ] | ||
console.log( doc.tokens().out( its.type, as.freqTable ) ); | ||
// -> [ [ 'word', 5 ], [ 'punctuation', 2 ], [ 'emoji', 1 ] ] | ||
``` | ||
Experiment with the above code on [RunKit](https://npm.runkit.com/wink-nlp). | ||
Experiment with winkNLP on [RunKit](https://npm.runkit.com/wink-nlp). | ||
### Explore Further | ||
Dive into [winkNLP's concepts](https://winkjs.org/wink-nlp/getting-started.html) or head to **[winkNLP recipes](https://observablehq.com/collection/@winkjs/winknlp-recipes)** for common NLP tasks or just explore live [showcases](https://winkjs.org/showcase.html) to learn: | ||
#### [Wikipedia Timeline](https://winkjs.org/showcase-timeline/) ⏳ | ||
Reads any wikipedia article and generates a visual timeline of all its events. | ||
#### [NLP Wizard](https://winkjs.org/showcase-wiz/) 🧙 | ||
Performs tokenization, sentence boundary detection, pos tagging, named entity detection and sentiment analysis of user input text in real time. | ||
#### [Naive Wikification Tool](https://observablehq.com/@winkjs/how-to-build-a-naive-wikification-tool) 🔗 | ||
Links entities such as famous persons, locations or objects to the relevant Wikipedia pages. | ||
## Speed & Accuracy | ||
The [winkNLP](https://winkjs.org/wink-nlp/) processes raw text at **~525,000 tokens per second** with its default language model — [wink-eng-lite-model](https://github.com/winkjs/wink-eng-lite-model), when [benchmarked](https://github.com/bestiejs/benchmark.js) using "Ch 13 of Ulysses by James Joyce" on a 2.2 GHz Intel Core i7 machine with 16GB RAM. The processing included the entire NLP pipeline — tokenization, sentence boundary detection, negation handling, sentiment analysis, part-of-speech tagging, and named entity extraction. This speed is way ahead of the prevailing speed benchmarks. | ||
The [winkNLP](https://winkjs.org/wink-nlp/) processes raw text at **~650,000 tokens per second** with its [wink-eng-lite-web-model](https://github.com/winkjs/wink-eng-lite-web-model), when [benchmarked](https://github.com/bestiejs/benchmark.js) using "Ch 13 of Ulysses by James Joyce" on a M1 Macbook Pro machine with 16GB RAM. The processing included the entire NLP pipeline — tokenization, sentence boundary detection, negation handling, sentiment analysis, part-of-speech tagging, and named entity extraction. This speed is way ahead of the prevailing speed benchmarks. | ||
The benchmark was conducted on [Node.js versions 14.8.0, and 12.18.3](https://nodejs.org/en/about/releases/). It delivered similar/better performance on Node.js versions 16/18. | ||
The benchmark was conducted on [Node.js versions 16, and 18](https://nodejs.org/en/about/releases/). | ||
The [winkNLP](https://winkjs.org/wink-nlp/) delivers similar performance on browsers; its performance on a specific machine/browser combination can be measured using the Observable notebook — [How to measure winkNLP's speed on browsers?](https://observablehq.com/@winkjs/how-to-measure-winknlps-speed-on-browsers?collection=@winkjs/winknlp-recipes). | ||
It pos tags a subset of WSJ corpus with an accuracy of **~94.7%** — this includes *tokenization of raw text prior to pos tagging*. The present state-of-the-art is at ~97% accuracy but at lower speeds and is generally computed using gold standard pre-tokenized corpus. | ||
It pos tags a subset of WSJ corpus with an accuracy of **~94.7%** — this includes *tokenization of raw text prior to pos tagging*. The current state-of-the-art is at ~97% accuracy but at lower speeds and is generally computed using gold standard pre-tokenized corpus. | ||
Its general purpose sentiment analysis delivers a [f-score](https://en.wikipedia.org/wiki/F1_score) of **~84.5%**, when validated using Amazon Product Review [Sentiment Labelled Sentences Data Set](https://archive.ics.uci.edu/ml/machine-learning-databases/00331/) at [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/index.php). The current benchmark accuracy for **specifically trained** models can range around 95%. | ||
@@ -135,7 +123,2 @@ | ||
## Documentation | ||
- [Concepts](https://winkjs.org/wink-nlp/getting-started.html) — everything you need to know to get started. | ||
- [API Reference](https://winkjs.org/wink-nlp/read-doc.html) — explains usage of APIs with examples. | ||
- [Change log](https://github.com/winkjs/wink-nlp/blob/master/CHANGELOG.md) — version history along with the details of breaking changes, if any. | ||
- [Showcases](https://winkjs.org/showcase.html) — live examples with code to give you a head start. | ||
@@ -154,4 +137,4 @@ ## Need Help? | ||
## About wink | ||
[Wink](https://winkjs.org/) is a family of open source packages for **Natural Language Processing**, **Machine Learning**, and **Statistical Analysis** in NodeJS. The code is **thoroughly documented** for easy human comprehension and has a **test coverage of ~100%** for reliability to build production grade solutions. | ||
## About winkJS | ||
[WinkJS](https://winkjs.org/) is a family of open source packages for **Natural Language Processing**, **Machine Learning**, and **Statistical Analysis** in NodeJS. The code is **thoroughly documented** for easy human comprehension and has a **test coverage of ~100%** for reliability to build production grade solutions. | ||
@@ -162,2 +145,2 @@ ## Copyright & License | ||
It is licensed under the terms of the MIT License. | ||
It is licensed under the terms of the MIT License. |
@@ -0,20 +1,29 @@ | ||
// Load wink-nlp package. | ||
const winkNLP = require( 'wink-nlp' ); | ||
const its = require( 'wink-nlp/src/its.js' ); | ||
// Use web model for RunKit. | ||
// Load english language model — light version. | ||
const model = require( 'wink-eng-lite-web-model' ); | ||
// Instantiate winkNLP. | ||
const nlp = winkNLP( model ); | ||
// Obtain "its" helper to extract item properties. | ||
const its = nlp.its; | ||
// Obtain "as" reducer helper to reduce a collection. | ||
const as = nlp.as; | ||
const text = 'Its quarterly profits jumped 76% to $1.13 billion for the three months to December, from $639million of previous year.'; | ||
// NLP Code. | ||
const text = 'Hello World🌎! How are you?'; | ||
const doc = nlp.readDoc( text ); | ||
// Print tokens. | ||
console.log( doc.out() ); | ||
// -> Hello World🌎! How are you? | ||
console.log( doc.sentences().out() ); | ||
// -> [ 'Hello World🌎!', 'How are you?' ] | ||
console.log( doc.entities().out( its.detail ) ); | ||
// -> [ { value: '🌎', type: 'EMOJI' } ] | ||
console.log( doc.tokens().out() ); | ||
// Print each token's type. | ||
console.log( doc.tokens().out( its.type ) ); | ||
// Print details of each entity. | ||
console.log( doc.entities().out( its.detail ) ); | ||
// Markup entities along with their type for highlighting them in the text. | ||
doc.entities().each( ( e ) => { | ||
e.markup( '<mark>', `<sub style="font-weight:900"> ${e.out(its.type)}</sub></mark>` ); | ||
} ); | ||
// Render them as HTML via RunKit | ||
doc.out( its.markedUpText ); | ||
// -> [ 'Hello', 'World', '🌎', '!', 'How', 'are', 'you', '?' ] | ||
console.log( doc.tokens().out( its.type, as.freqTable ) ); | ||
// -> [ [ 'word', 5 ], [ 'punctuation', 2 ], [ 'emoji', 1 ] ] |
572692
5947
139