Comparing version 1.9.0 to 1.10.0
@@ -1,1 +0,1 @@ | ||
{"processes":{"f2a3a9a2-3ac8-4f39-9736-44049045f081":{"parent":null,"children":[]}},"files":{"/Users/neilsbohr/dev/winkjs/wink-nlp/src/wink-nlp.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/dd-wrapper.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/constants.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/doc-v2.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-entities.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/locate.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/get-parent-item.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/search.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-get-item.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-get-item.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-each.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-each.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-filter.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-filter.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-token-out.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/its.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/sort4FT.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/allowed.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/as.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-markings.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-tokens-out.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-tokens-out.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-entity-out.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-entities-out.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-entities-out.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-sentence-out.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-sentences-out.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-document-out.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/print-tokens.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/cache.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokenizer.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/recursive-tokenizer.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compile-trex.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokens-mappers.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/examples-compiler.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/automaton.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compose-patterns.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/helper.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/bm25-vectorizer.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/allowed.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/similarity.js":["f2a3a9a2-3ac8-4f39-9736-44049045f081"]},"externalIds":{}} | ||
{"processes":{"bd418e9e-72f2-4293-81ff-b902fc60d267":{"parent":null,"children":[]}},"files":{"/Users/neilsbohr/dev/winkjs/wink-nlp/src/wink-nlp.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/dd-wrapper.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/constants.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/doc-v2.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-entities.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/locate.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/get-parent-item.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/search.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-get-item.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-get-item.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-each.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-each.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-filter.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-filter.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-token-out.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/its.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/sort4FT.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/allowed.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/as.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-markings.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-tokens-out.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-tokens-out.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-entity-out.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-entities-out.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-entities-out.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-sentence-out.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-sentences-out.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-document-out.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/print-tokens.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/cache.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokenizer.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/recursive-tokenizer.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compile-trex.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokens-mappers.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/examples-compiler.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/automaton.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compose-patterns.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/helper.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/bm25-vectorizer.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/allowed.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/similarity.js":["bd418e9e-72f2-4293-81ff-b902fc60d267"]},"externalIds":{}} |
@@ -0,1 +1,8 @@ | ||
# [Enabling loading of BM25Vectorizer model](https://github.com/winkjs/wink-nlp/releases/tag/1.10.0) | ||
## Version 1.10.0 November 18, 2021 | ||
### ✨ Features | ||
- Easily load BM25Vectorizer's model using newly introduced `.loadModel()` api. 🎉 | ||
# [Enhancing Typescript support](https://github.com/winkjs/wink-nlp/releases/tag/1.9.0) | ||
@@ -2,0 +9,0 @@ ## Version 1.9.0 November 06, 2021 |
{ | ||
"name": "wink-nlp", | ||
"version": "1.9.0", | ||
"version": "1.10.0", | ||
"description": "Developer friendly Natural Language Processing ✨", | ||
@@ -5,0 +5,0 @@ "keywords": [ |
@@ -121,2 +121,4 @@ # winkNLP | ||
The [winkNLP](https://winkjs.org/wink-nlp/) delivers similar performance on browsers; its performance on a specific machine/browser combination can be measured using the Observable notebook — [How to measure winkNLP's speed on browsers?](https://observablehq.com/@winkjs/how-to-measure-winknlps-speed-on-browsers?collection=@winkjs/winknlp-recipes). | ||
It pos tags a subset of WSJ corpus with an accuracy of **~94.7%** — this includes *tokenization of raw text prior to pos tagging*. The current state-of-the-art is at ~97% accuracy but at lower speeds and is generally computed using gold standard pre-tokenized corpus. | ||
@@ -123,0 +125,0 @@ |
@@ -208,6 +208,13 @@ // wink-nlp | ||
its.modelJSON = function ( tf, idf ) { | ||
return JSON.stringify( { tf: tf, idf: idf } ); | ||
its.modelJSON = function ( tf, idf, terms, docId, sumOfAllDLs ) { | ||
return JSON.stringify( { | ||
uid: 'WinkNLP-BM25Vectorizer-Model/1.0.0', | ||
tf: tf, | ||
idf: idf, | ||
terms: terms, | ||
docId: docId, | ||
sumOfAllDLs: sumOfAllDLs | ||
} ); | ||
}; // model() | ||
module.exports = its; |
@@ -309,3 +309,3 @@ // Minimum TypeScript Version: 4.0 | ||
export type Norm = "l2" | "NONE"; | ||
export type Norm = "l1" | "l2" | "none"; | ||
@@ -321,6 +321,7 @@ export interface BM25VectorizerConfig { | ||
learn(tokens: Tokens): void; | ||
out<T>(f: ItsFunction<T>): T; | ||
doc(n: number): Document; | ||
out<T>(f: ItsFunction<T>): T; | ||
vectorOf(tokens: Tokens): number[]; | ||
config(): BM25VectorizerConfig; | ||
loadModel(json: string): void; | ||
} | ||
@@ -327,0 +328,0 @@ |
@@ -98,3 +98,3 @@ // wink-nlp | ||
// Setup precision. | ||
const precision = getValidCfgNum( cfg.precision, 9, 1, 18 ); | ||
const precision = getValidCfgNum( cfg.precision, 6, 1, 12 ); | ||
// Setup norm. | ||
@@ -138,2 +138,3 @@ const norm = ( | ||
if ( weightsComputed ) return; | ||
if ( docId === 0 ) throw Error( 'wink-nlp: this operation doesn\'t make sense without any learning; use learn() API first.' ); | ||
// Set the average document length used for normalization. | ||
@@ -223,4 +224,7 @@ const avgDL = sumOfAllDLs / docId; | ||
computeWeights(); | ||
if ( allowed.its4BM25.has( f ) ) return f( tf, idf, terms ); | ||
return its.docBOWArray( tf, idf, terms ); | ||
// Pass `docId` & `sumOfAllDLs` in additionn to `tf`, `idf` & `terms`; this | ||
// is needed while saving the model JSON. | ||
if ( allowed.its4BM25.has( f ) ) return f( tf, idf, terms, docId, sumOfAllDLs ); | ||
// In case of innvalid `f`, fall back to the default method — `docBOWArray`. | ||
return its.docBOWArray( tf, idf, terms, docId, sumOfAllDLs ); | ||
}; // out() | ||
@@ -274,13 +278,2 @@ | ||
// ## length | ||
/** | ||
* | ||
* Returns the number of unique tokens in the entire corpus. | ||
* | ||
* @return {number} the number of unique tokens in the corpus. | ||
*/ | ||
methods.length = function () { | ||
return Object.keys( idf ).length; | ||
}; // length() | ||
// ## vectorOf | ||
@@ -316,3 +309,3 @@ /** | ||
// `thisNorm || 1` ensures that there is no attempt to divide by zero! | ||
return arr.map( ( v ) => +( v / ( thisNorm || 1 ) ).toFixed( 9 ) ); | ||
return arr.map( ( v ) => +( v / ( thisNorm || 1 ) ).toFixed( precision ) ); | ||
}; // vectorOf() | ||
@@ -322,2 +315,44 @@ | ||
// ## loadModel | ||
/** | ||
* Loads the input model JSON into the BM25's respective data structure. Throws | ||
* error if invalid JSON or model is passed. Sets `weightsComputed` to true to | ||
* prevent further learning. | ||
* @param {string} json Input model's JSON string. | ||
* @return {void} Nothing! | ||
*/ | ||
methods.loadModel = function ( json ) { | ||
// Used to check presence of required fields; `uid` is checked separately. | ||
const modelFields = [ 'docId', 'tf', 'idf', 'terms', 'sumOfAllDLs' ]; | ||
let model; | ||
if ( docId > 0 ) throw Error( 'wink-nlp: can not load model after learning.' ); | ||
try { | ||
model = JSON.parse( json ); | ||
} catch (e) { | ||
throw Error( `wink-nlp: invalid input JSON:\n\t${e}\n\n` ); | ||
} | ||
if ( helper.isObject( model ) && ( Object.keys( model ).length === 6 ) && ( model.uid === 'WinkNLP-BM25Vectorizer-Model/1.0.0' ) ) { | ||
// Check presence of all required fields. | ||
modelFields.forEach( ( f ) => { | ||
if ( model[ f ] === undefined ) throw Error( 'wink-nlp: invalid model format/version' ); | ||
} ); | ||
// All good, set fields. | ||
docId = model.docId; | ||
tf = model.tf; | ||
idf = model.idf; | ||
terms = model.terms; | ||
sumOfAllDLs = model.sumOfAllDLs; | ||
// To prevent further learning. | ||
weightsComputed = true; | ||
} else { | ||
throw Error( 'wink-nlp: invalid model format/version' ); | ||
} | ||
}; // loadModel() | ||
return methods; | ||
@@ -324,0 +359,0 @@ }; // bm25Vectorizer() |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
549756
5874
156