Socket
Socket
Sign inDemoInstall

wink-tokenizer

Package Overview
Dependencies
0
Maintainers
3
Versions
19
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 4.0.0 to 4.1.0

CODE_OF_CONDUCT.md

6

package.json
{
"name": "wink-tokenizer",
"version": "4.0.0",
"version": "4.1.0",
"description": "Multilingual tokenizer that automatically tags each token with its type",

@@ -47,4 +47,4 @@ "keywords": [

"docker": "^1.0.0",
"documentation": "^6.1.0",
"eslint": "^4.19.1",
"documentation": "^8.1.2",
"eslint": "^5.4.0",
"istanbul": "^0.4.5",

@@ -51,0 +51,0 @@ "jshint": "^2.9.5",

@@ -5,3 +5,3 @@ # wink-tokenizer

### [![Build Status](https://api.travis-ci.org/winkjs/wink-tokenizer.svg?branch=master)](https://travis-ci.org/winkjs/wink-tokenizer) [![Coverage Status](https://coveralls.io/repos/github/winkjs/wink-tokenizer/badge.svg?branch=master)](https://coveralls.io/github/winkjs/wink-tokenizer?branch=master) [![Inline docs](http://inch-ci.org/github/winkjs/wink-tokenizer.svg?branch=master)](http://inch-ci.org/github/winkjs/wink-tokenizer) [![devDependencies Status](https://david-dm.org/winkjs/wink-tokenizer/dev-status.svg)](https://david-dm.org/winkjs/wink-tokenizer?type=dev)
### [![Build Status](https://api.travis-ci.org/winkjs/wink-tokenizer.svg?branch=master)](https://travis-ci.org/winkjs/wink-tokenizer) [![Coverage Status](https://coveralls.io/repos/github/winkjs/wink-tokenizer/badge.svg?branch=master)](https://coveralls.io/github/winkjs/wink-tokenizer?branch=master) [![Inline docs](http://inch-ci.org/github/winkjs/wink-tokenizer.svg?branch=master)](http://inch-ci.org/github/winkjs/wink-tokenizer) [![devDependencies Status](https://david-dm.org/winkjs/wink-tokenizer/dev-status.svg)](https://david-dm.org/winkjs/wink-tokenizer?type=dev) [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/winkjs/Lobby)

@@ -18,4 +18,5 @@ [<img align="right" src="https://decisively.github.io/wink-logos/logo-title.png" width="100px" >](http://winkjs.org/)

1. Automatic detection & tagging of token's feature;
1. Automatic detection & tagging of different types of tokens based on their features:
- These include word, punctuation, email, mention, hashtag, emoticon, and emoji etc.
- User definable token types.

@@ -22,0 +23,0 @@

@@ -85,3 +85,5 @@ // wink-tokenizer

];
// Used to generate finger print from the tokens.
// NOTE: this variable is being reset in `defineConfig()`.
var fingerPrintCodes = {

@@ -266,3 +268,4 @@ emoticon: 'c',

* for that type of text; whereas false value will mean that the tokenization of that
* type of text will not be attempted.
* type of text will not be attempted. It also **resets** the effect of any previous
* call(s) to the [`addRegex()`](#addregex) API.
*

@@ -316,2 +319,19 @@ * *An empty config object is equivalent to splitting on spaces. Whatever tokens

} );
// Reset the `fingerPrintCodes` variable.
fingerPrintCodes = {
emoticon: 'c',
email: 'e',
emoji: 'j',
hashtag: 'h',
mention: 'm',
number: 'n',
ordinal: 'o',
quoted_phrase: 'q', // eslint-disable-line camelcase
currency: 'r',
// symbol: 's',
time: 't',
url: 'u',
word: 'w',
alien: 'z'
};
return ( ( Object.keys( uniqueCats ) ).length );

@@ -378,5 +398,69 @@ }; // defineConfig()

// ### addTag
var addTag = function (name, fingerprintCode) {
if (fingerPrintCodes[name]) {
throw new Error( 'Tag ' + name + ' already exists' );
}
fingerPrintCodes[name] = fingerprintCode;
}; // addTag()
// ### addRegex
/**
* Adds a regex for parsing a new type of token. This regex can either be mapped
* to an existing tag or it allows creation of a new tag along with its finger print.
* The uniqueness of the [finger prints](#defineconfig) have to ensured by the user.
*
* *The added regex(s) will supersede the internal parsing.*
*
* @param {RegExp} regex — the new regular expression.
* @param {string} tag — tokens matching the `regex` will be assigned this tag.
* @param {string} [fingerprintCode=undefined] — required if adding a new
* tag; ignored if using an existing tag.
* @return {void} nothing!
* @example
* // Adding a regex for an existing tag
* myTokenizer.addRegex( /\(oo\)/gi, 'emoticon' );
* myTokenizer.tokenize( '(oo) Hi!' )
* // -> [ { value: '(oo)', tag: 'emoticon' },
* // { value: 'Hi', tag: 'word' },
* // { value: '!', tag: 'punctuation' } ]
*
* // Adding a regex to parse a new token type
* myTokenizer.addRegex( /hello/gi, 'greeting', 'g' );
* myTokenizer.tokenize( 'hello, how are you?' );
* // -> [ { value: 'hello', tag: 'greeting' },
* // { value: ',', tag: 'punctuation' },
* // { value: 'how', tag: 'word' },
* // { value: 'are', tag: 'word' },
* // { value: 'you', tag: 'word' },
* // { value: '?', tag: 'punctuation' } ]
* // Notice how "hello" is now tagged as "greeting" and not as "word".
*
* // Using definConfig will reset the above!
* myTokenizer.defineConfig( { word: true } );
* myTokenizer.tokenize( 'hello, how are you?' );
* // -> [ { value: 'hello', tag: 'word' },
* // { value: ',', tag: 'punctuation' },
* // { value: 'how', tag: 'word' },
* // { value: 'are', tag: 'word' },
* // { value: 'you', tag: 'word' },
* // { value: '?', tag: 'punctuation' } ]
*/
var addRegex = function (regex, tag, fingerprintCode) {
if (!fingerPrintCodes[tag] && !fingerprintCode) {
throw new Error( 'Tag ' + tag + ' doesn\'t exist; Provide a \'fingerprintCode\' to add it as a tag.' );
} else if (!fingerPrintCodes[tag]) {
addTag(tag, fingerprintCode);
}
rgxs.unshift( { regex: regex, category: tag } );
}; // addRegex()
methods.defineConfig = defineConfig;
methods.tokenize = tokenize;
methods.getTokensFP = getTokensFP;
methods.addTag = addTag;
methods.addRegex = addRegex;
return methods;

@@ -383,0 +467,0 @@ };

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc