wink-nlp
Advanced tools
Comparing version 1.13.1 to 1.14.0
@@ -1,1 +0,1 @@ | ||
{"processes":{"447606eb-68d4-4bdc-894f-5ad8a7af4cf5":{"parent":null,"children":[]}},"files":{"/Users/neilsbohr/dev/winkjs/wink-nlp/src/wink-nlp.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/dd-wrapper.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/constants.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/doc-v2.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-entities.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/locate.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/get-parent-item.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/search.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-get-item.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-get-item.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-each.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-each.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-filter.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-filter.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-token-out.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/its.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/sort4FT.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/allowed.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/as.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-markings.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-tokens-out.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-tokens-out.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-entity-out.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-entities-out.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-entities-out.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-sentence-out.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-sentences-out.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-document-out.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/print-tokens.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/cache.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokenizer.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/recursive-tokenizer.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compile-trex.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokens-mappers.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/examples-compiler.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/automaton.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compose-patterns.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/identify-marked-area.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/helper.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/bm25-vectorizer.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/allowed.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/similarity.js":["447606eb-68d4-4bdc-894f-5ad8a7af4cf5"]},"externalIds":{}} | ||
{"processes":{"8906aaf5-b61e-498b-9a09-90d1440090a1":{"parent":null,"children":[]}},"files":{"/Users/neilsbohr/dev/winkjs/wink-nlp/src/wink-nlp.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/dd-wrapper.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/constants.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/doc-v2.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-entities.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/locate.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/get-parent-item.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/search.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-get-item.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-get-item.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-each.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-each.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-filter.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-filter.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-token-out.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/its.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/sort4FT.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/sentence-wise-importance.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/allowed.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/as.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/contained-markings.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-tokens-out.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-tokens-out.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-entity-out.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-entities-out.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/sel-entities-out.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-sentence-out.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/col-sentences-out.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/itm-document-out.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/api/print-tokens.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/cache.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokenizer.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/recursive-tokenizer.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compile-trex.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/tokens-mappers.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/examples-compiler.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/automaton.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/compose-patterns.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/identify-marked-area.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/src/helper.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/bm25-vectorizer.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/allowed.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"],"/Users/neilsbohr/dev/winkjs/wink-nlp/utilities/similarity.js":["8906aaf5-b61e-498b-9a09-90d1440090a1"]},"externalIds":{}} |
@@ -0,1 +1,7 @@ | ||
# [Introducing helper for extracting important sentences from a document](https://github.com/winkjs/wink-nlp/releases/tag/1.14.0) | ||
## Version 1.14.0 May 20, 2023 | ||
### ✨ Features | ||
- You can now use `its.sentenceWiseImprotance` helper to obtain sentence wise importance (on a scale of 0 to 1) of a document, if it is supported by language model. 📚📊🤓 | ||
# [Operational update](https://github.com/winkjs/wink-nlp/releases/tag/1.13.1) | ||
@@ -2,0 +8,0 @@ ## Version 1.13.1 March 27, 2023 |
{ | ||
"name": "wink-nlp", | ||
"version": "1.13.1", | ||
"version": "1.14.0", | ||
"description": "Developer friendly Natural Language Processing ✨", | ||
@@ -5,0 +5,0 @@ "keywords": [ |
@@ -10,5 +10,5 @@ # winkNLP | ||
It is built ground up with a lean code base that has [no external dependency](https://snyk.io/test/github/winkjs/wink-nlp?tab=dependencies). A test coverage of [~100%](https://coveralls.io/github/winkjs/wink-nlp?branch=master) and compliance with the [Open Source Security Foundation best practices](https://bestpractices.coreinfrastructure.org/en/projects/6035) make winkNLP the ideal tool for building production grade systems with confidence. | ||
It is built ground up with [no external dependency](https://snyk.io/test/github/winkjs/wink-nlp?tab=dependencies) and has a [lean code base of ~10Kb minified & gzipped](https://bundlephobia.com/package/wink-nlp). A test coverage of [~100%](https://coveralls.io/github/winkjs/wink-nlp?branch=master) and compliance with the [Open Source Security Foundation best practices](https://bestpractices.coreinfrastructure.org/en/projects/6035) make winkNLP the ideal tool for building production grade systems with confidence. | ||
WinkNLP with full [Typescript support](https://github.com/winkjs/wink-nlp/blob/master/types/index.d.ts), runs on Node.js and browsers. | ||
WinkNLP with full [Typescript support](https://github.com/winkjs/wink-nlp/blob/master/types/index.d.ts), runs on Node.js and [web browsers](https://github.com/winkjs/wink-nlp#how-to-install-for-web-browser). | ||
@@ -38,3 +38,3 @@ ## Build amazing apps quickly | ||
<tr><td>♻️ Extensive text processing features</td><td>Remove and/or retain tokens with specific attributes such as part-of-speech, named entity type, token type, stop word, shape and many more; compute Flesch reading ease score; generate n-grams; normalize, lemmatise or stem. Checkout how with the right kind of text preprocessing, even <a href="https://github.com/winkjs/wink-naive-bayes-text-classifier#readme">Naive Bayes classifier</a> achieves <b>impressive (≥90%)</b> accuracy in sentiment analysis and chatbot intent classification tasks.</td></tr> | ||
<tr><td>🔠 Pre-trained <a href="https://winkjs.org/wink-nlp/language-models.html">language models</a></td><td>Compact sizes starting from <b><3MB</b> – reduced model loading time drastically.</td></tr> | ||
<tr><td>🔠 Pre-trained <a href="https://winkjs.org/wink-nlp/language-models.html">language models</a></td><td>Compact sizes starting from <a href="https://bundlephobia.com/package/wink-eng-lite-web-model">~1MB (minified & gzipped)</a> – reduce model loading time drastically down to ~1 second on a 4G network.</td></tr> | ||
<tr><td>💼 Host of <a href="https://winkjs.org/wink-nlp/its-as-helper.html">utilities & tools</a></td><td>BM25 vectorizer; Several similarity methods – Cosine, Tversky, Sørensen-Dice, Otsuka-Ochiai; Helpers to get bag of words, frequency table, lemma/stem, stop word removal and many more.</td></tr> | ||
@@ -41,0 +41,0 @@ </table> |
@@ -6,5 +6,5 @@ # Roadmap 🧭 | ||
|---|---|---|---| | ||
|01.|**Extractive Summarization**:<br/> Add `its.summary` helper to produce extractive summary of text via `doc.out( its.summary )`. While it should be language agnostic, but it should leverage loaded language model's capability to improve summarization.| Simple | WIP | | ||
|01.|**Extractive Summarization**:<br/> Add `its.summary` helper to produce extractive summary of text via `doc.out( its.summary )`. While it should be language agnostic, but it should leverage loaded language model's capability to improve summarization.| Simple | [WIP](https://observablehq.com/@winkjs/how-to-visualize-key-sentences-in-a-document) | | ||
|02.|**Text Pre-processor**:<br/>Add a text preprocessing utility that provides options to (a) filter specific tokens based on their properties such as `pos`, `isStopWordFlag`, and `type`; (b) map entity type with a definable keyword; (c) add bigrams & trigrams and (d) inject sentiment. The API should follow winkNLP style and standards.|Medium|YTS| | ||
|03.|**Word Vectors Integration**:<br/>Add integration with various word vectors starting with GloVe. This should include compression/decompression for fast loading, helpers for token, sentence and document vector computation. |High|YTS| | ||
|03.|**Word Vectors Integration**:<br/>Add integration with various word vectors starting with GloVe. This should include compression/decompression for fast loading, helpers for token, sentence and document vector computation. |High|WIP| | ||
|04.|**Sub-word Tokenizer**:<br/>Add sub-word tokenization feature using techniques like Byte Pair Encoding (BPE) and/or WordPiece. The processing pipeline should allow choice of tokenizer.|Very High|YTS| | ||
@@ -11,0 +11,0 @@ |05.|**Compose Corpus**:<br/>Add a utility to produce training corpus using patterns and cartesian product.|Simple|YTS| |
@@ -118,3 +118,4 @@ // wink-nlp | ||
its.stem, | ||
its.readabilityStats | ||
its.readabilityStats, | ||
its.sentenceWiseImportance | ||
] ); | ||
@@ -121,0 +122,0 @@ |
@@ -70,2 +70,6 @@ // wink-nlp | ||
if ( itsfn === its.sentenceWiseImportance ) { | ||
return itsfn( rdd ); | ||
} | ||
// Setup the correct `as.fn` becuase the current markedup text would have | ||
@@ -72,0 +76,0 @@ // returned the `value`. Refer to `its.markedUpText`. |
@@ -36,2 +36,3 @@ // wink-nlp | ||
var caseMap = [ 'other', 'lowerCase', 'upperCase', 'titleCase' ]; | ||
var swi = require( './sentence-wise-importance.js' ); | ||
@@ -166,2 +167,6 @@ // Size of a single token. | ||
its.sentenceWiseImportance = function ( rdd ) { | ||
return swi( rdd ); | ||
}; // sentenceWiseImportance() | ||
/* ------ utilities ------ */ | ||
@@ -168,0 +173,0 @@ |
@@ -0,1 +1,33 @@ | ||
// wink-nlp | ||
// | ||
// Copyright (C) GRAYPE Systems Private Limited | ||
// | ||
// This file is part of “wink-nlp”. | ||
// | ||
// Permission is hereby granted, free of charge, to any | ||
// person obtaining a copy of this software and | ||
// associated documentation files (the "Software"), to | ||
// deal in the Software without restriction, including | ||
// without limitation the rights to use, copy, modify, | ||
// merge, publish, distribute, sublicense, and/or sell | ||
// copies of the Software, and to permit persons to | ||
// whom the Software is furnished to do so, subject to | ||
// the following conditions: | ||
// | ||
// The above copyright notice and this permission notice | ||
// shall be included in all copies or substantial | ||
// portions of the Software. | ||
// | ||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF | ||
// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED | ||
// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A | ||
// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF | ||
// CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
// DEALINGS IN THE SOFTWARE. | ||
// | ||
/** | ||
@@ -2,0 +34,0 @@ * Stable sort function for frequency table i.e. `[ [ term, frequency ] ... ]`. |
@@ -94,2 +94,7 @@ // Minimum TypeScript Version: 4.0 | ||
export interface SentenceImportance { | ||
index: number; | ||
importance: number; | ||
} | ||
export type ModelTermFrequencies = Bow; | ||
@@ -125,2 +130,3 @@ export type ModelInverseDocumentFrequencies = Bow; | ||
span(spanItem: number[]): number[]; | ||
sentenceWiseImportance(rdd: RawDocumentData): SentenceImportance[]; | ||
sentiment(spanItem: number[]): number; | ||
@@ -127,0 +133,0 @@ readabilityStats(rdd: RawDocumentData, addons: ModelAddons): ReadabilityStats; |
602787
62
6234