natural

Package Overview

Dependencies

Maintainers

Versions

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

natural - npm Package Compare versions

Comparing version 6.10.0 to 6.10.2

lib/natural/sentiment/SentimentAnalyzer.js

		@@ -25,3 +25,3 @@ /*

		const DEBUG = true
		const DEBUG = false

		@@ -28,0 +28,0 @@ // Afinn

lib/natural/tokenizers/aggressive_tokenizer_es.js

		@@ -26,14 +26,10 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		class AggressiveTokenizer extends Tokenizer {
		tokenize (text) {
		// break a string up into an array of tokens by anything non-word
		return this.trim(text.split(/[^a-zA-Zá-úÁ-ÚñÑüÜ]+/))
		}
		}
		util.inherits(AggressiveTokenizer, Tokenizer)

		module.exports = AggressiveTokenizer

		AggressiveTokenizer.prototype.tokenize = function (text) {
		// break a string up into an array of tokens by anything non-word
		return this.trim(text.split(/[^a-zA-Zá-úÁ-ÚñÑüÜ]+/))
		}

lib/natural/tokenizers/aggressive_tokenizer_fa.js

		@@ -27,25 +27,21 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		}
		util.inherits(AggressiveTokenizer, Tokenizer)
		class AggressiveTokenizer extends Tokenizer {
		clearEmptyString (array) {
		return array.filter(function (a) {
		return a !== ''
		})
		}

		module.exports = AggressiveTokenizer
		clearText (text) {
		return text.replace(/.:\+-="'!\?،,؛;/g, ' ')
		}

		AggressiveTokenizer.prototype.clearEmptyString = function (array) {
		return array.filter(function (a) {
		return a !== ''
		})
		tokenize (text) {
		// break a string up into an array of tokens by anything non-word
		text = this.clearText(text)
		return this.clearEmptyString(text.split(/\s+/))
		}
		}

		AggressiveTokenizer.prototype.clearText = function (text) {
		return text.replace(/.:\+-="'!\?،,؛;/g, ' ')
		}

		AggressiveTokenizer.prototype.tokenize = function (text) {
		// break a string up into an array of tokens by anything non-word
		text = this.clearText(text)
		return this.clearEmptyString(text.split(/\s+/))
		}
		module.exports = AggressiveTokenizer

lib/natural/tokenizers/aggressive_tokenizer_fr.js

		@@ -26,14 +26,10 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		class AggressiveTokenizer extends Tokenizer {
		tokenize (text) {
		// break a string up into an array of tokens by anything non-word
		return this.trim(text.split(/[^a-z0-9äâàéèëêïîöôùüûœç-]+/i))
		}
		}
		util.inherits(AggressiveTokenizer, Tokenizer)

		module.exports = AggressiveTokenizer

		AggressiveTokenizer.prototype.tokenize = function (text) {
		// break a string up into an array of tokens by anything non-word
		return this.trim(text.split(/[^a-z0-9äâàéèëêïîöôùüûœç-]+/i))
		}

lib/natural/tokenizers/aggressive_tokenizer_hi.js

		@@ -31,10 +31,3 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		}

		util.inherits(AggressiveTokenizer, Tokenizer)

		/*
		@@ -48,7 +41,9 @@ To know more on hindi

		AggressiveTokenizer.prototype.tokenize = function (text) {
		const response = this.trim(text.replace(/[\u0964\u0965...?,]/g, '').split(/\s+\|(?![\u0900-\u097F\u0020-\u007F])./u)).filter(Boolean)
		return response
		class AggressiveTokenizer extends Tokenizer {
		tokenize (text) {
		const response = this.trim(text.replace(/[\u0964\u0965...?,]/g, '').split(/\s+\|(?![\u0900-\u097F\u0020-\u007F])./u)).filter(Boolean)
		return response
		}
		}

		module.exports = AggressiveTokenizer

lib/natural/tokenizers/aggressive_tokenizer_id.js

		@@ -26,22 +26,18 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		class AggressiveTokenizer extends Tokenizer {
		// Remove all non alphanumeric characters except '-'
		// Replace more than one space character to ' '
		normalizeText (text) {
		const result = text.replace(/[^a-z0-9 -]/g, ' ').replace(/( +)/g, ' ')
		return result
		}

		tokenize (text) {
		// break a string up into an array of tokens by space
		text = this.normalizeText(text)
		return this.trim(text.split(' '))
		}
		}
		util.inherits(AggressiveTokenizer, Tokenizer)

		module.exports = AggressiveTokenizer

		// Remove all non alphanumeric characters except '-'
		// Replace more than one space character to ' '
		function normalizeText (text) {
		const result = text.replace(/[^a-z0-9 -]/g, ' ').replace(/( +)/g, ' ')
		return result
		}

		AggressiveTokenizer.prototype.tokenize = function (text) {
		// break a string up into an array of tokens by space
		text = normalizeText(text)
		return this.trim(text.split(' '))
		}

lib/natural/tokenizers/aggressive_tokenizer_it.js

		@@ -26,14 +26,10 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		class AggressiveTokenizer extends Tokenizer {
		tokenize (text) {
		// break a string up into an array of tokens by anything non-word
		return this.trim(text.split(/\W+/))
		}
		}
		util.inherits(AggressiveTokenizer, Tokenizer)

		module.exports = AggressiveTokenizer

		AggressiveTokenizer.prototype.tokenize = function (text) {
		// break a string up into an array of tokens by anything non-word
		return this.trim(text.split(/\W+/))
		}

lib/natural/tokenizers/aggressive_tokenizer_nl.js

		@@ -26,14 +26,10 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		class AggressiveTokenizer extends Tokenizer {
		tokenize (text) {
		// break a string up into an array of tokens by anything non-word
		return this.trim(text.split(/[^a-zA-Z0-9_'-]+/))
		}
		}
		util.inherits(AggressiveTokenizer, Tokenizer)

		module.exports = AggressiveTokenizer

		AggressiveTokenizer.prototype.tokenize = function (text) {
		// break a string up into an array of tokens by anything non-word
		return this.trim(text.split(/[^a-zA-Z0-9_'-]+/))
		}

lib/natural/tokenizers/aggressive_tokenizer_no.js

		@@ -27,16 +27,12 @@ /*
		const normalizer = require('../normalizers/normalizer_no')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		class AggressiveTokenizer extends Tokenizer {
		tokenize (text) {
		text = normalizer.removeDiacritics(text)

		// break a string up into an array of tokens by anything non-word
		return this.trim(text.split(/[^A-Za-z0-9_æøåÆØÅäÄöÖüÜ]+/))
		}
		}
		util.inherits(AggressiveTokenizer, Tokenizer)

		module.exports = AggressiveTokenizer

		AggressiveTokenizer.prototype.tokenize = function (text) {
		text = normalizer.removeDiacritics(text)

		// break a string up into an array of tokens by anything non-word
		return this.trim(text.split(/[^A-Za-z0-9_æøåÆØÅäÄöÖüÜ]+/))
		}

lib/natural/tokenizers/aggressive_tokenizer_pl.js

		@@ -26,23 +26,18 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		}
		class AggressiveTokenizer extends Tokenizer {
		withoutEmpty (array) {
		return array.filter(function (a) { return a })
		}

		util.inherits(AggressiveTokenizer, Tokenizer)
		clearText (text) {
		return text.replace(/[^a-zążśźęćńół0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
		}

		module.exports = AggressiveTokenizer

		AggressiveTokenizer.prototype.withoutEmpty = function (array) {
		return array.filter(function (a) { return a })
		tokenize (text) {
		// break a string up into an array of tokens by anything non-word
		return this.withoutEmpty(this.clearText(text).split(' '))
		}
		}

		AggressiveTokenizer.prototype.clearText = function (text) {
		return text.replace(/[^a-zążśźęćńół0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
		}

		AggressiveTokenizer.prototype.tokenize = function (text) {
		// break a string up into an array of tokens by anything non-word
		return this.withoutEmpty(this.clearText(text).split(' '))
		}
		module.exports = AggressiveTokenizer

lib/natural/tokenizers/aggressive_tokenizer_pt.js

		@@ -26,18 +26,14 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		class AggressiveTokenizer extends Tokenizer {
		withoutEmpty (array) {
		return array.filter(function (a) { return a })
		}

		tokenize (text) {
		// break a string up into an array of tokens by anything non-word
		return this.withoutEmpty(this.trim(text.split(/[^a-zA-Zà-úÀ-Ú]/)))
		}
		}
		util.inherits(AggressiveTokenizer, Tokenizer)

		module.exports = AggressiveTokenizer

		AggressiveTokenizer.prototype.withoutEmpty = function (array) {
		return array.filter(function (a) { return a })
		}

		AggressiveTokenizer.prototype.tokenize = function (text) {
		// break a string up into an array of tokens by anything non-word
		return this.withoutEmpty(this.trim(text.split(/[^a-zA-Zà-úÀ-Ú]/)))
		}

lib/natural/tokenizers/aggressive_tokenizer_ru.js

		@@ -26,23 +26,18 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		}
		class AggressiveTokenizer extends Tokenizer {
		withoutEmpty (array) {
		return array.filter(function (a) { return a })
		}

		util.inherits(AggressiveTokenizer, Tokenizer)
		clearText (text) {
		return text.replace(/[^a-zа-яё0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
		}

		module.exports = AggressiveTokenizer

		AggressiveTokenizer.prototype.withoutEmpty = function (array) {
		return array.filter(function (a) { return a })
		tokenize (text) {
		// break a string up into an array of tokens by anything non-word
		return this.withoutEmpty(this.clearText(text).split(' '))
		}
		}

		AggressiveTokenizer.prototype.clearText = function (text) {
		return text.replace(/[^a-zа-яё0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
		}

		AggressiveTokenizer.prototype.tokenize = function (text) {
		// break a string up into an array of tokens by anything non-word
		return this.withoutEmpty(this.clearText(text).split(' '))
		}
		module.exports = AggressiveTokenizer

lib/natural/tokenizers/aggressive_tokenizer_sv.js

		@@ -27,17 +27,13 @@ /*
		const normalizer = require('../normalizers/normalizer_sv')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		class AggressiveTokenizer extends Tokenizer {
		tokenize (text) {
		text = normalizer.removeDiacritics(text)

		// break a string up into an array of tokens by anything non-word
		// Ü is not part of swedish alphabet but there are words using it like müsli and München
		return this.trim(text.split(/[^A-Za-z0-9_åÅäÄöÖüÜ-]+/))
		}
		}
		util.inherits(AggressiveTokenizer, Tokenizer)

		module.exports = AggressiveTokenizer

		AggressiveTokenizer.prototype.tokenize = function (text) {
		text = normalizer.removeDiacritics(text)

		// break a string up into an array of tokens by anything non-word
		// Ü is not part of swedish alphabet but there are words using it like müsli and München
		return this.trim(text.split(/[^A-Za-z0-9_åÅäÄöÖüÜ-]+/))
		}

lib/natural/tokenizers/aggressive_tokenizer_uk.js

		@@ -0,25 +1,43 @@
		/*
		Copyright (c) 2023, Pluto Rotegott
		Tokenizer for Ukrainian

		Permission is hereby granted, free of charge, to any person obtaining a copy
		of this software and associated documentation files (the "Software"), to deal
		in the Software without restriction, including without limitation the rights
		to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
		copies of the Software, and to permit persons to whom the Software is
		furnished to do so, subject to the following conditions:

		The above copyright notice and this permission notice shall be included in
		all copies or substantial portions of the Software.

		THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
		AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
		OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
		THE SOFTWARE.
		*/

		'use strict'

		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		}
		class AggressiveTokenizer extends Tokenizer {
		withoutEmpty (array) {
		return array.filter(function (a) { return a })
		}

		util.inherits(AggressiveTokenizer, Tokenizer)
		clearText (text) {
		return text.replace(/[^a-zа-яґєії0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
		}

		module.exports = AggressiveTokenizer

		AggressiveTokenizer.prototype.withoutEmpty = function (array) {
		return array.filter(function (a) { return a })
		tokenize (text) {
		// break a string up into an array of tokens by anything non-word
		return this.withoutEmpty(this.clearText(text).split(' '))
		}
		}

		AggressiveTokenizer.prototype.clearText = function (text) {
		return text.replace(/[^a-zа-яґєії0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
		}

		AggressiveTokenizer.prototype.tokenize = function (text) {
		// break a string up into an array of tokens by anything non-word
		return this.withoutEmpty(this.clearText(text).split(' '))
		}
		module.exports = AggressiveTokenizer

lib/natural/tokenizers/aggressive_tokenizer_vi.js

		@@ -26,15 +26,10 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		class AggressiveTokenizer extends Tokenizer {
		// break a string up into an array of tokens by anything non-word
		tokenize (text) {
		return this.trim(text.split(/[^a-z0-9àáảãạăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệíìỉĩịóòỏõọôốồổỗộơớờởỡợúùủũụưứừửữựýỳỷỹỵđ]+/i))
		}
		}

		util.inherits(AggressiveTokenizer, Tokenizer)

		// break a string up into an array of tokens by anything non-word
		AggressiveTokenizer.prototype.tokenize = function (text) {
		return this.trim(text.split(/[^a-z0-9àáảãạăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệíìỉĩịóòỏõọôốồổỗộơớờởỡợúùủũụưứừửữựýỳỷỹỵđ]+/i))
		}

		module.exports = AggressiveTokenizer

lib/natural/tokenizers/aggressive_tokenizer.js

		@@ -26,20 +26,16 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const AggressiveTokenizer = function () {
		Tokenizer.call(this)
		class AggressiveTokenizer extends Tokenizer {
		tokenize (text) {
		// break a string up into an array of tokens by anything non-word
		// underscore is considered to be non word character
		// Old splitter:
		// return this.trim(text.split(/[\W\|_]+/))
		// New splitter:
		// Explicitly mentions which characters make up words.
		// So words may contain hyphen -, single quote ' and slash /
		return this.trim(text.split(/[^a-zA-Z0-9'\-/]+/))
		}
		}
		util.inherits(AggressiveTokenizer, Tokenizer)

		module.exports = AggressiveTokenizer

		AggressiveTokenizer.prototype.tokenize = function (text) {
		// break a string up into an array of tokens by anything non-word
		// underscore is considered to be non word character
		// Old splitter:
		// return this.trim(text.split(/[\W\|_]+/))
		// New splitter:
		// Explicitly mentions which characters make up words.
		// So words may contain hyphen -, single quote ' and slash /
		return this.trim(text.split(/[^a-zA-Z0-9'\-/]+/))
		}

lib/natural/tokenizers/regexp_tokenizer.js

		@@ -25,30 +25,31 @@ /*

		const DEBUG = false

		const Tokenizer = require('./tokenizer')
		const util = require('util')
		const _ = require('underscore')

		// Base Class for RegExp Matching
		const RegexpTokenizer = function (opts) {
		const options = opts \|\| {}
		this._pattern = options.pattern \|\| this._pattern
		this.discardEmpty = options.discardEmpty \|\| true
		class RegexpTokenizer extends Tokenizer {
		constructor (opts) {
		super(opts)
		const options = opts \|\| {}
		this._pattern = options.pattern \|\| this._pattern
		this.discardEmpty = options.discardEmpty \|\| true

		// Match and split on GAPS not the actual WORDS
		this._gaps = options.gaps
		// Match and split on GAPS not the actual WORDS
		this._gaps = options.gaps

		if (this._gaps === undefined) {
		this._gaps = true
		if (this._gaps === undefined) {
		this._gaps = true
		}
		}
		}

		util.inherits(RegexpTokenizer, Tokenizer)
		tokenize (s) {
		let results

		RegexpTokenizer.prototype.tokenize = function (s) {
		let results

		if (this._gaps) {
		results = s.split(this._pattern)
		return (this.discardEmpty) ? _.without(results, '', ' ') : results
		} else {
		return s.match(this._pattern)
		if (this._gaps) {
		results = s.split(this._pattern)
		return (this.discardEmpty) ? _.without(results, '', ' ') : results
		} else {
		return s.match(this._pattern)
		}
		}
		@@ -61,18 +62,21 @@ }

		/***
		* A tokenizer that accepts an alphabet definition.
		* @param {string} options.language ISO 639-1 for the language, e.g. 'en'
		*/
		const OrthographyTokenizer = function (options) {
		const pattern = orthographyMatchers[options.language]
		if (!pattern) {
		WordTokenizer.call(this, options)
		} else {
		this._pattern = pattern
		RegexpTokenizer.call(this, options)
		class OrthographyTokenizer {
		constructor (options) {
		const pattern = orthographyMatchers[options.language]
		DEBUG && console.log(pattern)

		if (!pattern) {
		this.tokenizer = new WordTokenizer()
		} else {
		this.tokenizer = new RegexpTokenizer(options)
		this.tokenizer._pattern = pattern
		DEBUG && console.log(this.tokenizer)
		}
		}

		tokenize (text) {
		return this.tokenizer.tokenize(text)
		}
		}

		util.inherits(OrthographyTokenizer, RegexpTokenizer)

		exports.OrthographyTokenizer = OrthographyTokenizer
		@@ -88,8 +92,9 @@
		*/
		const WordTokenizer = function (options) {
		this._pattern = /[^A-Za-zА-Яа-я0-9_]+/
		RegexpTokenizer.call(this, options)
		class WordTokenizer extends RegexpTokenizer {
		constructor (options) {
		super(options)
		this._pattern = /[^A-Za-zА-Яа-я0-9_]+/
		}
		}

		util.inherits(WordTokenizer, RegexpTokenizer)
		exports.WordTokenizer = WordTokenizer
		@@ -105,8 +110,12 @@
		*/
		const WordPunctTokenizer = function (options) {
		this._pattern = /([A-Za-zÀ-ÿ-]+\|[0-9._]+\|.\|!\|\?\|'\|"\|:\|;\|,\|-)/i
		RegexpTokenizer.call(this, options)
		class WordPunctTokenizer extends RegexpTokenizer {
		constructor (options) {
		if (!options) {
		options = {}
		}
		options.pattern = /([A-Za-zÀ-ÿ-]+\|[0-9._]+\|.\|!\|\?\|'\|"\|:\|;\|,\|-)/i
		super(options)
		}
		}

		util.inherits(WordPunctTokenizer, RegexpTokenizer)
		exports.WordPunctTokenizer = WordPunctTokenizer

lib/natural/tokenizers/sentence_tokenizer_parser.js

		@@ -25,14 +25,9 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const SentenceTokenizer = function () {
		Tokenizer.call(this)
		class SentenceTokenizer extends Tokenizer {
		tokenize (text) {
		return (parser.parse(text))
		}
		}

		util.inherits(SentenceTokenizer, Tokenizer)

		SentenceTokenizer.prototype.tokenize = function (text) {
		return (parser.parse(text))
		}

		module.exports = SentenceTokenizer

lib/natural/tokenizers/sentence_tokenizer.js

		@@ -26,30 +26,27 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')

		const DEBUG = false

		const SentenceTokenizer = function () {
		Tokenizer.call(this)
		}
		util.inherits(SentenceTokenizer, Tokenizer)
		class SentenceTokenizer extends Tokenizer {
		tokenize (text) {
		// Break string up in to sentences based on punctation and quotation marks
		// See https://gist.github.com/Hugo-ter-Doest/4ed21fb7eb5077814d998fa61a726566
		// for a breakdown of the regular expression
		let tokens = text.match(/(?<=\s+\|^)["'‘“'"[({⟨]?(.?[.?!…]\|.+)(\s[.?!…])["'’”'"\])}⟩]?(?=\s+\|$)/g)

		SentenceTokenizer.prototype.tokenize = function (text) {
		// break string up in to sentences based on punctation and quotation marks
		// let tokens = text.match(/(?<=\s+\|^)["'‘“'"[({⟨]?.?[.?!…](\s[.?!…])["'’”'"\])}⟩]?(?=\s+\|$)/g)
		let tokens = text.match(/(?<=\s+\|^)["'‘“'"[({⟨]?(.?[.?!…]\|.+)(\s[.?!…])["'’”'"\])}⟩]?(?=\s+\|$)/g)
		DEBUG && console.log('SentenceTokenizer.tokenize: ' + tokens)

		DEBUG && console.log('SentenceTokenizer.tokenize: ' + tokens)
		if (!tokens) {
		return [text]
		}

		if (!tokens) {
		return [text]
		}
		// remove unecessary white space
		tokens = tokens.map(Function.prototype.call, String.prototype.trim)

		// remove unecessary white space
		tokens = tokens.map(Function.prototype.call, String.prototype.trim)
		DEBUG && console.log('SentenceTokenizer.tokenize: tokens after removing whitespace ' + tokens)

		DEBUG && console.log('SentenceTokenizer.tokenize: tokens after removing whitespace ' + tokens)

		return this.trim(tokens)
		return this.trim(tokens)
		}
		}

		module.exports = SentenceTokenizer

lib/natural/tokenizers/tokenizer_case.js

		@@ -24,39 +24,24 @@ /*
		const Tokenizer = require('./tokenizer')
		const util = require('util')
		const CaseTokenizer = function () {
		Tokenizer.call(this)
		}

		util.inherits(CaseTokenizer, Tokenizer)

		// Changing the prototype of a native type is bad practice
		/*
		CaseTokenizer.prototype.attach = function () {
		const self = this

		String.prototype.tokenize = function (preserveApostrophe) {
		return self.tokenize(this, preserveApostrophe)
		}
		}
		*/

		// Idea from Seagull: http://stackoverflow.com/a/26482650
		CaseTokenizer.prototype.tokenize = function (text, preserveApostrophe) {
		const whitelist = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
		const lower = text.toLowerCase()
		const upper = text.toUpperCase()
		let result = ''
		let i
		class CaseTokenizer extends Tokenizer {
		tokenize (text, preserveApostrophe) {
		const whitelist = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
		const lower = text.toLowerCase()
		const upper = text.toUpperCase()
		let result = ''
		let i

		for (i = 0; i < lower.length; ++i) {
		if (lower[i] !== upper[i] \|\| whitelist.indexOf(lower[i]) > -1 \|\| (text[i] === '\'' && preserveApostrophe)) {
		result += text[i]
		} else {
		result += ' '
		for (i = 0; i < lower.length; ++i) {
		if (lower[i] !== upper[i] \|\| whitelist.indexOf(lower[i]) > -1 \|\| (text[i] === '\'' && preserveApostrophe)) {
		result += text[i]
		} else {
		result += ' '
		}
		}

		return this.trim(result.replace(/\s+/g, ' ').split(' '))
		}

		return this.trim(result.replace(/\s+/g, ' ').split(' '))
		}

		module.exports = CaseTokenizer

lib/natural/tokenizers/tokenizer.js

		@@ -23,33 +23,14 @@ /*

		/**
		* \@todo Use .bind() in Tokenizer.prototype.attach().
		*/

		'use strict'

		const Tokenizer = function () {
		}
		class Tokenizer {
		trim (array) {
		while (array[array.length - 1] === '') { array.pop() }

		Tokenizer.prototype.trim = function (array) {
		while (array[array.length - 1] === '') { array.pop() }
		while (array[0] === '') { array.shift() }

		while (array[0] === '') { array.shift() }

		return array
		}

		// Expose an attach function that will patch String with new methods.
		// Changing the prototype of a native type is bad practice
		/*
		Tokenizer.prototype.attach = function () {
		const self = this

		String.prototype.tokenize = function () {
		return self.tokenize(this)
		return array
		}
		}
		*/

		Tokenizer.prototype.tokenize = function () {}

		module.exports = Tokenizer

package.json

		{
		"name": "natural",
		"description": "General natural language (tokenizing, stemming (English, Russian, Spanish), part-of-speech tagging, sentiment analysis, classification, inflection, phonetics, tfidf, WordNet, jaro-winkler, Levenshtein distance, Dice's Coefficient) facilities for node.",
		"version": "6.10.0",
		"version": "6.10.2",
		"homepage": "https://github.com/NaturalNode/natural",
		@@ -36,3 +36,3 @@ "repository": {
		"sinon": "^1.12.2",
		"standard": "^16.0.3",
		"standard": "^16.0.4",
		"ts-standard": "^12.0.2",
		@@ -39,0 +39,0 @@ "typescript": "^4.9.3",

natural - npm Package Compare versions

New alerts

Fixed alerts

Worsened metrics