wink-nlp
Advanced tools
Comparing version 1.1.0 to 1.2.0
@@ -1,1 +0,1 @@ | ||
{"processes":{"fcae40f5-9afa-450a-8030-bc7e2e4217a6":{"parent":null,"children":[]}},"files":{"/Users/neilsbohr/dev/winkjs/wink-nlp/src/wink-nlp.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/dd-wrapper.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/constants.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/doc-v2.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-entities.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/locate.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/get-parent-item.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/search.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-get-item.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-get-item.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-each.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-each.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-filter.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-filter.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-token-out.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/its.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/allowed.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/as.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-markings.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-tokens-out.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-tokens-out.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-entity-out.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-entities-out.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-entities-out.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-sentence-out.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-sentences-out.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-document-out.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/print-tokens.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/cache.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokenizer.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/recursive-tokenizer.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compile-trex.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokens-mappers.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/examples-compiler.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/automaton.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compose-patterns.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/helper.js":["fcae40f5-9afa-450a-8030-bc7e2e4217a6"]},"externalIds":{}} | ||
{"processes":{"088da41f-7e67-418e-8c65-0a782998d94f":{"parent":null,"children":[]}},"files":{"/Users/neilsbohr/dev/winkjs/wink-nlp/src/wink-nlp.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/dd-wrapper.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/constants.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/doc-v2.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-entities.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/locate.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/get-parent-item.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/search.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-get-item.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-get-item.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-each.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-each.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-filter.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-filter.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-token-out.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/its.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/sort4FT.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/allowed.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/as.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-markings.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-tokens-out.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-tokens-out.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-entity-out.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-entities-out.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-entities-out.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-sentence-out.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-sentences-out.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-document-out.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/print-tokens.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/cache.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokenizer.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/recursive-tokenizer.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compile-trex.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokens-mappers.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/examples-compiler.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/automaton.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compose-patterns.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/helper.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/bm25-vectorizer.js":["088da41f-7e67-418e-8c65-0a782998d94f"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/allowed.js":["088da41f-7e67-418e-8c65-0a782998d94f"]},"externalIds":{}} |
@@ -0,1 +1,12 @@ | ||
# [Introducing support for browser ready language model](https://github.com/winkjs/wink-nlp/releases/tag/1.2.0) | ||
## Version 1.2.0 December 24, 2020 | ||
### β¨ Features | ||
- We have added support for browser ready language model. π€© π | ||
- Now easily vectorize text using bm25-based vectroizer. π€ π | ||
# | ||
### βοΈ Updates | ||
- Examples in README now runs on [RunKit](https://npm.runkit.com/wink-nlp) using web model! β | ||
# [Enabling add-ons to support new language model ](https://github.com/winkjs/wink-nlp/releases/tag/1.1.0) | ||
@@ -2,0 +13,0 @@ ## Version 1.1.0 September 18, 2020 |
{ | ||
"name": "wink-nlp", | ||
"version": "1.1.0", | ||
"version": "1.2.0", | ||
"description": "Developer friendly NLP β¨", | ||
@@ -20,2 +20,4 @@ "keywords": [ | ||
"stemmer", | ||
"bm25", | ||
"vectorizer", | ||
"winkNLP", | ||
@@ -48,7 +50,8 @@ "wink" | ||
"docker": "^1.0.0", | ||
"eslint": "^7.5.0", | ||
"eslint": "^7.14.0", | ||
"nyc": "^15.1.0", | ||
"mocha": "^8.0.1" | ||
"mocha": "^8.2.1" | ||
}, | ||
"runkitExampleFilename": "./runkit-example.js", | ||
"dependencies": {} | ||
} |
# winkNLP | ||
### [![Build Status](https://api.travis-ci.org/winkjs/wink-nlp.svg?branch=master)](https://travis-ci.org/winkjs/wink-nlp) [![Coverage Status](https://coveralls.io/repos/github/winkjs/wink-nlp/badge.svg?branch=master)](https://coveralls.io/github/winkjs/wink-nlp?branch=master) [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/winkjs/Lobby) | ||
### [![Build Status](https://api.travis-ci.org/winkjs/wink-nlp.svg?branch=master)](https://travis-ci.org/winkjs/wink-nlp) [![Coverage Status](https://coveralls.io/repos/github/winkjs/wink-nlp/badge.svg?branch=master)](https://coveralls.io/github/winkjs/wink-nlp?branch=master) [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/winkjs/Lobby) [![Follow on Twitter](https://img.shields.io/twitter/follow/winkjs_org?style=social)](https://twitter.com/winkjs_org) | ||
@@ -10,2 +10,5 @@ ## Developer friendly NLP β¨ | ||
[<img src="https://user-images.githubusercontent.com/9491/100614781-ad17bb00-333c-11eb-87ab-2ae41aa21285.png" alt="Wink Wizard Showcase">](https://winkjs.org/showcase-wiz/) | ||
## Features | ||
@@ -15,12 +18,24 @@ It packs a rich feature set into a small foot print codebase of [under 1500 lines](https://coveralls.io/github/winkjs/wink-nlp?branch=master): | ||
1. Lossless & multilingual tokenizer | ||
2. Developer friendly and intuitive API | ||
3. Built-in API to aid text visualization | ||
4. Easy information extraction from raw text | ||
5. Extensive text pre-processing features | ||
6. Pre-trained models with sizes starting from <3MB onwards | ||
7. Word vector integration | ||
8. Comprehensive NLP pipeline covering tokenization, sentence boundary detection, negation handling, sentiment analysis, part-of-speech (pos) tagging, named entity extraction, custom entities detection and pattern matching | ||
9. No external dependencies. | ||
7. BM25-based vectorizer | ||
8. Word vector integration | ||
9. Comprehensive NLP pipeline covering tokenization, sentence boundary detection, negation handling, sentiment analysis, part-of-speech (pos) tagging, named entity extraction, custom entities detection and pattern matching | ||
10. No external dependencies. | ||
11. Runs on web browsers | ||
## Installation | ||
@@ -44,2 +59,5 @@ | ||
#### How to install for Web Browser | ||
If youβre using winkNLP in the browser use the [wink-eng-lite-web-model](https://www.npmjs.com/package/wink-eng-lite-web-model) instead. Learn about its installation and usage in our [guide to using winkNLP in the browser](https://winkjs.org/wink-nlp/how-to-run-wink-nlp-in-browser.html). | ||
## Getting Started | ||
@@ -80,2 +98,4 @@ The "Hello World!" in winkNLP is given below. As the next step, we recommend a dive into [winkNLP's concepts](https://winkjs.org/wink-nlp/getting-started.html). | ||
> Try a sample code at [RunKit](https://npm.runkit.com/wink-nlp) or head to [showcases](https://winkjs.org/showcase.html) for live examples. | ||
## Speed & Accuracy | ||
@@ -97,2 +117,3 @@ The [winkNLP](https://winkjs.org/wink-nlp/) processes raw text at **~525,000 tokens per second** with its default language model β [wink-eng-lite-model](https://github.com/winkjs/wink-eng-lite-model), when [benchmarked](https://github.com/bestiejs/benchmark.js) using "Ch 13 of Ulysses by James Joyce" on a 2.2 GHz Intel Core i7 machine with 16GB RAM. The processing included the entire NLP pipeline β tokenization, sentence boundary detection, negation handling, sentiment analysis, part-of-speech tagging, and named entity extraction. This speed is way ahead of the prevailing speed benchmarks. | ||
- [Change log](https://github.com/winkjs/wink-nlp/blob/master/CHANGELOG.md) β version history along with the details of breaking changes, if any. | ||
- [Showcases](https://winkjs.org/showcase.html) β live examples with code to give you a head start. | ||
@@ -99,0 +120,0 @@ ## Need Help? |
@@ -33,2 +33,3 @@ // wink-nlp | ||
var sort4FT = require( './sort4FT.js' ); | ||
var containedMarkings = require( './contained-markings.js' ); | ||
@@ -92,3 +93,3 @@ var as = Object.create( null ); | ||
return table.sort( ( a, b ) => ( b[ 1 ] - a[ 1 ] ) ); | ||
return table.sort( sort4FT ); | ||
}; // freqTable() | ||
@@ -95,0 +96,0 @@ |
@@ -33,2 +33,3 @@ // wink-nlp | ||
var sort4FT = require( './sort4FT.js' ); | ||
var constants = require( './constants.js' ); | ||
@@ -142,3 +143,49 @@ var caseMap = [ 'other', 'lowerCase', 'upperCase', 'titleCase' ]; | ||
/* ------ utilities ------ */ | ||
its.terms = function ( tf, idf, terms ) { | ||
return terms; | ||
}; // terms() | ||
its.docTermMatrix = function ( tf, idf, terms ) { | ||
const dtm = new Array( tf.length ); | ||
for ( let id = 0; id < tf.length; id += 1 ) { | ||
dtm[ id ] = []; | ||
for ( let i = 0; i < terms.length; i += 1 ) { | ||
dtm[ id ].push( tf[ id ][ terms[ i ] ] || 0 ); | ||
} | ||
} | ||
return dtm; | ||
}; // getDocTermMatrix() | ||
its.docBOWArray = function ( tf ) { | ||
return tf; | ||
}; // docBOWArray() | ||
its.bow = function ( tf ) { | ||
return tf; | ||
}; // bow() | ||
its.idf = function ( tf, idf ) { | ||
var arr = []; | ||
for ( const t in idf ) { // eslint-disable-line guard-for-in | ||
arr.push( [ t, idf[ t ] ] ); | ||
} | ||
// Sort on frequency followed by the term. | ||
return arr.sort( sort4FT ); | ||
}; // idf() | ||
its.tf = function ( tf ) { | ||
const arr = []; | ||
for ( const t in tf ) { // eslint-disable-line guard-for-in | ||
arr.push( [ t, tf[ t ] ] ); | ||
} | ||
// Sort on frequency followed by the term. | ||
return arr.sort( sort4FT ); | ||
}; // tf() | ||
its.modelJSON = function ( tf, idf ) { | ||
return JSON.stringify( { tf: tf, idf: idf } ); | ||
}; // model() | ||
module.exports = its; |
490303
54
5130
135