Comparing version 1.11.0 to 1.12.0
@@ -1,1 +0,1 @@ | ||
{"processes":{"bb470559-1230-4db7-9f42-e224006fd2ad":{"parent":null,"children":[]}},"files":{"/Users/neilsbohr/dev/winkjs/wink-nlp/src/wink-nlp.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/dd-wrapper.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/constants.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/doc-v2.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-entities.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/locate.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/get-parent-item.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/search.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-get-item.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-get-item.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-each.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-each.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-filter.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-filter.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-token-out.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/its.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/sort4FT.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/allowed.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/as.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-markings.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-tokens-out.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-tokens-out.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-entity-out.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-entities-out.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-entities-out.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-sentence-out.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-sentences-out.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-document-out.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/print-tokens.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/cache.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokenizer.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/recursive-tokenizer.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compile-trex.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokens-mappers.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/examples-compiler.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/automaton.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compose-patterns.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/helper.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/bm25-vectorizer.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/allowed.js":["bb470559-1230-4db7-9f42-e224006fd2ad"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/similarity.js":["bb470559-1230-4db7-9f42-e224006fd2ad"]},"externalIds":{}} | ||
{"processes":{"727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59":{"parent":null,"children":[]}},"files":{"/Users/neilsbohr/dev/winkjs/wink-nlp/src/wink-nlp.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/dd-wrapper.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/constants.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/doc-v2.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-entities.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/locate.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/get-parent-item.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/search.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-get-item.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-get-item.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-each.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-each.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-filter.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-filter.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-token-out.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/its.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/sort4FT.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/allowed.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/as.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-markings.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-tokens-out.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-tokens-out.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-entity-out.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-entities-out.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-entities-out.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-sentence-out.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-sentences-out.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-document-out.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/print-tokens.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/cache.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokenizer.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/recursive-tokenizer.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compile-trex.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokens-mappers.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/examples-compiler.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/automaton.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compose-patterns.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/helper.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/bm25-vectorizer.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/allowed.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/similarity.js":["727a0ee4-8cb0-457f-bc8b-9fa9f09a3d59"]},"externalIds":{}} |
@@ -0,1 +1,9 @@ | ||
# [Some enhancements plus earned OpenSSF best practices passing badge](https://github.com/winkjs/wink-nlp/releases/tag/1.12.0) | ||
## Version 1.12.0 May 13, 2022 | ||
### ✨ Features | ||
- winkNLP earned [Open Source Security Foundation (OpenSSF) Best Practices passing badge](https://bestpractices.coreinfrastructure.org/en/projects/6035). 🎉 👏 🙌 | ||
- `.bowOf()` api of [BM25Vectorizer](https://winkjs.org/wink-nlp/bm25-vectorizer.html) now supports processing of OOV tokens — useful for cosine similarity computation. 😎 | ||
- [Document](https://winkjs.org/wink-nlp/document.html) has a new API — `.pipeConfig()` to inquire the active processing pipeline. | ||
# [Enhancing custom entities & BM25Vectorizer](https://github.com/winkjs/wink-nlp/releases/tag/1.11.0) | ||
@@ -2,0 +10,0 @@ ## Version 1.11.0 January 30, 2022 |
{ | ||
"name": "wink-nlp", | ||
"version": "1.11.0", | ||
"version": "1.12.0", | ||
"description": "Developer friendly Natural Language Processing ✨", | ||
@@ -5,0 +5,0 @@ "keywords": [ |
# winkNLP | ||
### [![Build Status](https://travis-ci.com/winkjs/wink-nlp.svg?branch=master)](https://travis-ci.com/github/winkjs/wink-nlp) [![Coverage Status](https://coveralls.io/repos/github/winkjs/wink-nlp/badge.svg?branch=master)](https://coveralls.io/github/winkjs/wink-nlp?branch=master) [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/winkjs/Lobby) [![Follow on Twitter](https://img.shields.io/twitter/follow/winkjs_org?style=social)](https://twitter.com/winkjs_org) | ||
### [![Build Status](https://travis-ci.com/winkjs/wink-nlp.svg?branch=master)](https://travis-ci.com/github/winkjs/wink-nlp) [![Coverage Status](https://coveralls.io/repos/github/winkjs/wink-nlp/badge.svg?branch=master)](https://coveralls.io/github/winkjs/wink-nlp?branch=master) [![Known Vulnerabilities](https://snyk.io/test/github/winkjs/wink-nlp/badge.svg)](https://snyk.io/test/github/winkjs/wink-nlp) [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/6035/badge)](https://bestpractices.coreinfrastructure.org/projects/6035) [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/winkjs/Lobby) [![Follow on Twitter](https://img.shields.io/twitter/follow/winkjs_org?style=social)](https://twitter.com/winkjs_org) | ||
@@ -139,3 +139,3 @@ ## Developer friendly Natural Language Processing ✨ | ||
### Usage query 👩🏽💻 | ||
Please ask at [Stack Overflow](https://stackoverflow.com/) or discuss it at [Wink JS Gitter Lobby](https://gitter.im/winkjs/Lobby). | ||
Please ask at [Stack Overflow](https://stackoverflow.com/) or discuss at [Wink JS GitHub Discussions](https://github.com/winkjs/wink-nlp/discussions) or chat with us at [Wink JS Gitter Lobby](https://gitter.im/winkjs/Lobby). | ||
@@ -146,3 +146,3 @@ ### Bug report 🐛 | ||
### New feature ✨ | ||
Looking for a new feature, request it via a new [issue](https://github.com/winkjs/wink-nlp/issues) or consider becoming a [contributor](https://github.com/winkjs/wink-nlp/blob/master/CONTRIBUTING.md). | ||
Looking for a new feature, request it via the [new features & ideas](https://github.com/winkjs/wink-nlp/discussions/categories/new-features-ideas) discussion forum or consider becoming a [contributor](https://github.com/winkjs/wink-nlp/blob/master/CONTRIBUTING.md). | ||
@@ -149,0 +149,0 @@ |
@@ -459,2 +459,5 @@ // wink-nlp | ||
// Enusre that we make a deep copy of config before returning to avoid corruption! | ||
methods.pipeConfig = () => JSON.parse( JSON.stringify( docData.currPipe ) ); | ||
return methods; | ||
@@ -461,0 +464,0 @@ }; |
@@ -112,10 +112,5 @@ // wink-nlp | ||
// Annotation stuff. | ||
// Contains a list of valid annotations built from `theModel`. | ||
var validAnnotations = Object.create( null ); | ||
validAnnotations.sbd = true; | ||
validAnnotations.negation = true; | ||
validAnnotations.sentiment = true; | ||
validAnnotations.pos = true; | ||
validAnnotations.ner = true; | ||
validAnnotations.cer = true; | ||
// Current pipe. | ||
@@ -235,2 +230,5 @@ var currPipe = Object.create( null ); | ||
rdd.markings = []; | ||
// Publish the current annotation pipeline so that code can inquire about | ||
// active annotations! | ||
rdd.currPipe = currPipe; | ||
@@ -400,2 +398,11 @@ var wrappedDocData = DocDataWrapper( rdd ); // eslint-disable-line new-cap | ||
// Build a list of valid annotations from `theModel`. This will ensure that | ||
// only **available** annotations from the model can be used in the pipe. | ||
validAnnotations.sbd = typeof theModel.sbd === 'function'; | ||
validAnnotations.negation = typeof theModel.negation === 'function'; | ||
validAnnotations.sentiment = typeof theModel.sa === 'function'; | ||
validAnnotations.pos = typeof theModel.pos === 'function'; | ||
validAnnotations.ner = typeof theModel.ner === 'function'; | ||
validAnnotations.cer = typeof theModel.metaCER === 'function'; | ||
const tempPipe = ( pipe === undefined ) ? Object.keys( validAnnotations ) : pipe; | ||
@@ -402,0 +409,0 @@ if ( helper.isArray( tempPipe ) ) { |
@@ -312,7 +312,9 @@ // wink-nlp | ||
* Computes the bag-of-words (bowOf) of the input document, using the tf-idf | ||
* learned so far. | ||
* @param {string[]} tokens tokenized text, usually obtained via winkNLP. | ||
* @return {object} its bow. | ||
* learned so far. If `processOOV` is true then for OOV token's frequency is | ||
* computed and its `idf` is assumed to be **1**; otherwise all OOVs are ignored. | ||
* @param {string[]} tokens tokenized text, usually obtained via winkNLP. | ||
* @param {boolean} processOOV true — process OOV, false — ignore OOV (default). | ||
* @return {object} its bow. | ||
*/ | ||
methods.bowOf = function ( tokens ) { | ||
methods.bowOf = function ( tokens, processOOV = false ) { | ||
computeWeights(); | ||
@@ -323,10 +325,21 @@ const bow = Object.create( null ); | ||
if ( typeof processOOV !== 'boolean' ) { | ||
throw Error( 'wink-nlp: processOOV must be a boolean.' ); | ||
} | ||
for ( let i = 0; i < tokens.length; i += 1 ) { | ||
const t = tokens[ i ]; | ||
// bow applies only if the token is not an unseen one! | ||
if ( idf[ t ] ) bow[ t ] = 1 + ( bow[ t ] || 0 ); | ||
// `processOOV` true means count every term otherwise count only if it is | ||
// in the vocabulary i.e. `idf`. | ||
if ( processOOV ) { | ||
bow[ t ] = 1 + ( bow[ t ] || 0 ); | ||
} else if ( idf[ t ] ) bow[ t ] = 1 + ( bow[ t ] || 0 ); | ||
} | ||
for ( const t in bow ) { // eslint-disable-line guard-for-in | ||
bow[ t ] = idf[ t ] * ( ( k1 + 1 ) * bow[ t ] ) / ( ( k1 * ( 1 - b + ( b * ( tokens.length / avgDL ) ) ) ) + bow[ t ] ); | ||
// `bow` tokens are determined by `processOOV` i.e. if true it will contain | ||
// OOVs also otherwise it will not have any OOV. On the other hand `idf` | ||
// always contains all the seen tokens. Therefore when `processOOV` is true, | ||
// the `idf[ t ]` for all OOV will be taken as **1** (highest possible value). | ||
bow[ t ] = ( idf[ t ] || 1 ) * ( ( k1 + 1 ) * bow[ t ] ) / ( ( k1 * ( 1 - b + ( b * ( tokens.length / avgDL ) ) ) ) + bow[ t ] ); | ||
thisNorm += normFn[ norm ]( bow[ t ] ); | ||
@@ -333,0 +346,0 @@ } |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
563164
59
5943