prospectimo - npm Package Compare versions

prospectimo

Package Overview

Dependencies

Maintainers

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 0.2.0 to 0.3.0

204

index.js

		/**
		* prospectimo
		* v0.2.0
		* v0.3.0
		*
		@@ -24,7 +24,8 @@ * Analyse the temporal orientation of a string.
		* 'encoding': 'binary', // 'binary' (default), or 'frequency' - type of word encoding to use.
		* 'threshold': -0.38, //
		* 'bigrams': true, // compare against bigrams in the lexicon?
		* 'trigrams': true, // compare against trigrams in the lexicon?
		* }
		* const text = "A big long string of text...";
		* const orientation = prospectimo(text, opts);
		* const str = "A big long string of text...";
		* const orientation = prospectimo(str, opts);
		* console.log(orientation)
		@@ -34,3 +35,3 @@ *
		* @param {Object} opts options
		* @return {string\|number} temporal orientation or lexical value based on opts
		* @return {Object\|string} temporal orientation or lexical value based on opts
		*/
		@@ -43,16 +44,15 @@

		let tokenizer = root.tokenizer
		let lexicon = root.lexicon
		let natural = root.natural
		let tokenizer = root.tokenizer

		if (typeof tokenizer === 'undefined') {
		const hasRequire = typeof require !== 'undefined'
		if (hasRequire) {
		tokenizer = require('happynodetokenizer')
		if (typeof lexicon === 'undefined') {
		if (typeof require !== 'undefined') {
		lexicon = require('./data/lexicon.json')
		natural = require('natural')
		} else throw new Error('prospectimo required happynodetokenizer and ./data/lexicon.json')
		tokenizer = require('happynodetokenizer')
		} else throw new Error('prospectimo requires node modules happynodetokenizer and natural, and ./data/lexicon.json')
		}

		// get multiple indexes helper
		// Find how many times an element appears in an array
		Array.prototype.indexesOf = function (el) {
		@@ -70,14 +70,18 @@ const idxs = []
		/**
		* @function getBigrams
		* @param {string} str input string
		* @return {Array} array of bigram strings
		* Get all the n-grams of a string and return as an array
		* @function getNGrams
		* @param {string} str input string
		* @param {number} n abitrary n-gram number, e.g. 2 = bigrams
		* @return {Array} array of ngram strings
		*/
		const getBigrams = str => {
		const NGrams = natural.NGrams
		const bigrams = NGrams.bigrams(str)
		const getNGrams = (str, n) => {
		// default to bi-grams on null n
		if (n == null) n = 2
		if (typeof n !== 'number') n = Number(n)
		const ngrams = natural.NGrams.ngrams(str, n)
		const len = ngrams.length
		const result = []
		const len = bigrams.length
		let i = 0
		for (i; i < len; i++) {
		result.push(bigrams[i].join(' '))
		result.push(ngrams[i].join(' '))
		}
		@@ -88,19 +92,14 @@ return result
		/**
		* @function getTrigrams
		* @param {string} str input string
		* @return {Array} array of trigram strings
		* Loop through lexicon and match against array
		* @function getMatches
		* @param {Array} arr token array
		* @param {number} threshold min. weight threshold
		* @return {Object} object of matches
		*/
		const getTrigrams = str => {
		const NGrams = natural.NGrams
		const trigrams = NGrams.trigrams(str)
		const result = []
		const len = trigrams.length
		let i = 0
		for (i; i < len; i++) {
		result.push(trigrams[i].join(' '))
		}
		return result
		}

		const getMatches = (arr) => {
		const getMatches = (arr, threshold) => {
		// error prevention
		if (arr == null) return null
		if (threshold == null) threshold = -999
		if (typeof threshold !== 'number') threshold = Number(threshold)
		// loop through categories in lexicon
		const matches = {}
		@@ -111,23 +110,15 @@ let category
		let match = []
		let key
		let word
		let data = lexicon[category]
		for (key in data) {
		if (!data.hasOwnProperty(key)) continue
		if (arr.indexOf(key) > -1) {
		let item
		let weight = data[key]
		let reps = arr.indexesOf(key).length
		if (reps > 1) {
		let words = []
		for (let i = 0; i < reps; i++) {
		words.push(key)
		}
		item = [words, weight]
		} else {
		item = [key, weight]
		}
		match.push(item)
		// loop through words in category
		for (word in data) {
		if (!data.hasOwnProperty(word)) continue
		let weight = data[word]
		// if word from input matches word from lexicon ...
		if (arr.indexOf(word) > -1 && weight > threshold) {
		let count = arr.indexesOf(word).length // number of times the word appears in the input text
		match.push([word, count, weight])
		}
		matches[category] = match
		}
		matches[category] = match
		}
		@@ -137,34 +128,35 @@ return matches

		/**
		* Calculate the total lexical value of matches
		* @function calcLex
		* @param {Object} obj matches object
		* @param {number} wc wordcount
		* @param {number} int intercept value
		* @param {string} enc encoding
		* @return {number} lexical value
		*/
		const calcLex = (obj, wc, int, enc) => {
		const counts = []
		const weights = []
		let key
		for (key in obj) {
		if (!obj.hasOwnProperty(key)) continue
		if (Array.isArray(obj[key][0])) {
		counts.push(obj[key][0].length)
		} else {
		counts.push(1)
		}
		weights.push(obj[key][1])
		}
		if (obj == null) return null
		let lex = 0
		let i
		const len = counts.length
		const words = Number(wc)
		for (i = 0; i < len; i++) {
		let weight = Number(weights[i])
		if (enc === 'frequency') {
		let count = Number(counts[i])
		lex += (count / words) * weight
		let word
		for (word in obj) {
		if (!obj.hasOwnProperty(word)) continue
		if (enc === 'binary' \|\| enc == null \|\| wc == null) {
		// weight + weight + weight etc
		lex += Number(obj[word][2])
		} else {
		lex += weight
		// (frequency / wordcount) * weight
		lex += (Number(obj[word][1]) / Number(wc)) * Number(obj[word][2])
		}
		}
		// add intercept value
		lex += int
		// return final lexical value
		if (int != null) lex += Number(int)
		return lex
		}

		/**
		* Converts the lexical values object to an orientation string
		* @function getOrientation
		* @param {Object} obj lexical values object
		* @return {string} 'Past', 'Present', or 'Future'
		*/
		const getOrientation = obj => {
		@@ -174,27 +166,24 @@ const a = [obj.PAST, obj.PRESENT, obj.FUTURE]

		let ori
		let orientation = `No temporal orientation detected.`
		if (indexOfMaxValue === 0) {
		ori = 'past'
		orientation = 'Past'
		} else if (indexOfMaxValue === 1) {
		ori = 'present'
		orientation = 'Present'
		} else if (indexOfMaxValue === 2) {
		ori = 'future'
		orientation = 'Future'
		}

		let str
		if (a[indexOfMaxValue] < 0) {
		str = `No temporal orientation association detected.`
		} else {
		str = ori
		}
		return str
		return orientation
		}

		/**
		* Analyse the temporal orientation of a string
		* @function prospectimo
		* @param {string} str input string
		* @param {Object} opts options
		* @return {Object\|string} temporal orientation or lexical value based on opts
		*/
		const prospectimo = (str, opts) => {
		// make sure there is input before proceeding
		// error prevention
		if (str == null) return null
		// if str isn't a string, make it into one
		if (typeof str !== 'string') str = str.toString()
		// trim whitespace and convert to lowercase
		str = str.toLowerCase().trim()
		// default options
		@@ -205,4 +194,5 @@ if (opts == null) {
		'encoding': 'binary',
		'bigrams': true, // match bigrams?
		'trigrams': true // match trigrams?
		'threshold': -999,
		'bigrams': true,
		'trigrams': true
		}
		@@ -212,20 +202,23 @@ }
		opts.encoding = opts.encoding \|\| 'binary'
		opts.threshold = opts.threshold \|\| -999
		// convert to lowercase and trim whitespace
		str = str.toLowerCase().trim()
		// convert our string to tokens
		let tokens = tokenizer(str)
		// if no tokens return null
		if (tokens == null) return { PAST: 0, PRESENT: 0, FUTURE: 0 }
		// get wordcount
		if (tokens == null) return null
		// get wordcount before we add n-grams
		const wordcount = tokens.length
		// handle bigrams if wanted
		// handle bi-grams if wanted
		if (opts.bigrams) {
		const bigrams = getBigrams(str)
		const bigrams = getNGrams(str, 2)
		tokens = tokens.concat(bigrams)
		}
		// handle trigrams if wanted
		// handle tri-grams if wanted
		if (opts.trigrams) {
		const trigrams = getTrigrams(str)
		const trigrams = getNGrams(str, 3)
		tokens = tokens.concat(trigrams)
		}
		// get matches from array
		const matches = getMatches(tokens)
		const matches = getMatches(tokens, opts.threshold)
		// calculate lexical useage
		@@ -238,7 +231,4 @@ const enc = opts.encoding
		// predict and return
		if (opts.return === 'lex') {
		return lex
		} else {
		return getOrientation(lex)
		}
		if (opts.return === 'lex') return lex
		return getOrientation(lex)
		}
		@@ -245,0 +235,0 @@

package.json

		{
		"name": "prospectimo",
		"version": "0.2.0",
		"version": "0.3.0",
		"description": "Analyse the temporal orientation of a string.",
		@@ -5,0 +5,0 @@ "main": "index.js",

README.md

		# prospectimo

		Analyse the temporal orientation of a string.
		Get the temporal orientation of a string.

		@@ -9,12 +9,45 @@ ## Usage
		const opts = {
		'return': 'lex', // 'orientation' return string, 'lex' (default) returns object of lexical values
		"threshold": -0.2,
		"bigrams": true,
		"trigrams": true
		'return': 'lex', // 'orientation' returns a string, 'lex' (default) returns object of lexical values
		'encoding': 'binary', // 'binary' (default), or 'frequency' - type of word encoding to use.
		'threshold': -0.98, // a lexical weight threshold between 1.16 (include nothing), and -0.98 (include everything, default)
		'bigrams': true, // compare against bigrams in the lexicon?
		'trigrams': true, // compare against trigrams in the lexicon?
		}
		const text = "A big long string of text...";
		let orientation = prospectimo(text, opts);
		console.log(orientation)
		const str = "A big long string of text...";
		const orientation = prospectimo(str, opts);
		console.log(orientation);
		```

		## Options

		### 'return'

		Valid options: 'lex' (default), or 'orientation'.

		'lex' returns an object with 'PAST', 'PRESENT' and 'FUTURE' keys, each containing a lexical value for that orientation.

		'orientation' returns a string stating either 'Past', 'Present', 'Future', or 'Unknown'.

		### 'encoding'

		Valid options: 'binary' (default), or 'frequency'.

		'binary' calculates the lexical value as simply a sum of weights, i.e. weight[1] + weight[2] + etc...

		'frequency' calculates the lexical value as (word frequency / total wordcount) * word weight

		Unless you have a specific need for frequency encoding, we recommend you use binary only.

		### 'threshold'

		The lexicon contains weight values that are very small. You can exclude them using the threshold option.

		The smallest value in the lexicon is -0.9772179. Therefore a threshold of -0.98 will include all words in the lexicon.

		The largest value in the lexicon is 1.15807005. Therefore a threshold of 1.16 will include no words in the lexicon.

		### 'bigrams' and 'trigrams'

		The lexicon includes strings that are between one and three words in length. By default we will match against these using bi-grams and tri-grams, however you may want to disable these when analysing very long strings to save processing time and memory use.

		## Acknowledgements
		@@ -28,7 +61,7 @@

		Used under the Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported licence
		Used under the [Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported](http://creativecommons.org/licenses/by-nc-sa/3.0/)

		# Licence
		(C) 2017 P. Hughes
		(C) 2017 [P. Hughes](www.phugh.es)

		[Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported](http://creativecommons.org/licenses/by-nc-sa/3.0/)

Improved metrics

Worsened metrics