Socket
Socket
Sign inDemoInstall

prospectimo

Package Overview
Dependencies
22
Maintainers
1
Versions
14
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 0.2.0 to 0.3.0

204

index.js
/**
* prospectimo
* v0.2.0
* v0.3.0
*

@@ -24,7 +24,8 @@ * Analyse the temporal orientation of a string.

* 'encoding': 'binary', // 'binary' (default), or 'frequency' - type of word encoding to use.
* 'threshold': -0.38, //
* 'bigrams': true, // compare against bigrams in the lexicon?
* 'trigrams': true, // compare against trigrams in the lexicon?
* }
* const text = "A big long string of text...";
* const orientation = prospectimo(text, opts);
* const str = "A big long string of text...";
* const orientation = prospectimo(str, opts);
* console.log(orientation)

@@ -34,3 +35,3 @@ *

* @param {Object} opts options
* @return {string|number} temporal orientation or lexical value based on opts
* @return {Object|string} temporal orientation or lexical value based on opts
*/

@@ -43,16 +44,15 @@

let tokenizer = root.tokenizer
let lexicon = root.lexicon
let natural = root.natural
let tokenizer = root.tokenizer
if (typeof tokenizer === 'undefined') {
const hasRequire = typeof require !== 'undefined'
if (hasRequire) {
tokenizer = require('happynodetokenizer')
if (typeof lexicon === 'undefined') {
if (typeof require !== 'undefined') {
lexicon = require('./data/lexicon.json')
natural = require('natural')
} else throw new Error('prospectimo required happynodetokenizer and ./data/lexicon.json')
tokenizer = require('happynodetokenizer')
} else throw new Error('prospectimo requires node modules happynodetokenizer and natural, and ./data/lexicon.json')
}
// get multiple indexes helper
// Find how many times an element appears in an array
Array.prototype.indexesOf = function (el) {

@@ -70,14 +70,18 @@ const idxs = []

/**
* @function getBigrams
* @param {string} str input string
* @return {Array} array of bigram strings
* Get all the n-grams of a string and return as an array
* @function getNGrams
* @param {string} str input string
* @param {number} n abitrary n-gram number, e.g. 2 = bigrams
* @return {Array} array of ngram strings
*/
const getBigrams = str => {
const NGrams = natural.NGrams
const bigrams = NGrams.bigrams(str)
const getNGrams = (str, n) => {
// default to bi-grams on null n
if (n == null) n = 2
if (typeof n !== 'number') n = Number(n)
const ngrams = natural.NGrams.ngrams(str, n)
const len = ngrams.length
const result = []
const len = bigrams.length
let i = 0
for (i; i < len; i++) {
result.push(bigrams[i].join(' '))
result.push(ngrams[i].join(' '))
}

@@ -88,19 +92,14 @@ return result

/**
* @function getTrigrams
* @param {string} str input string
* @return {Array} array of trigram strings
* Loop through lexicon and match against array
* @function getMatches
* @param {Array} arr token array
* @param {number} threshold min. weight threshold
* @return {Object} object of matches
*/
const getTrigrams = str => {
const NGrams = natural.NGrams
const trigrams = NGrams.trigrams(str)
const result = []
const len = trigrams.length
let i = 0
for (i; i < len; i++) {
result.push(trigrams[i].join(' '))
}
return result
}
const getMatches = (arr) => {
const getMatches = (arr, threshold) => {
// error prevention
if (arr == null) return null
if (threshold == null) threshold = -999
if (typeof threshold !== 'number') threshold = Number(threshold)
// loop through categories in lexicon
const matches = {}

@@ -111,23 +110,15 @@ let category

let match = []
let key
let word
let data = lexicon[category]
for (key in data) {
if (!data.hasOwnProperty(key)) continue
if (arr.indexOf(key) > -1) {
let item
let weight = data[key]
let reps = arr.indexesOf(key).length
if (reps > 1) {
let words = []
for (let i = 0; i < reps; i++) {
words.push(key)
}
item = [words, weight]
} else {
item = [key, weight]
}
match.push(item)
// loop through words in category
for (word in data) {
if (!data.hasOwnProperty(word)) continue
let weight = data[word]
// if word from input matches word from lexicon ...
if (arr.indexOf(word) > -1 && weight > threshold) {
let count = arr.indexesOf(word).length // number of times the word appears in the input text
match.push([word, count, weight])
}
matches[category] = match
}
matches[category] = match
}

@@ -137,34 +128,35 @@ return matches

/**
* Calculate the total lexical value of matches
* @function calcLex
* @param {Object} obj matches object
* @param {number} wc wordcount
* @param {number} int intercept value
* @param {string} enc encoding
* @return {number} lexical value
*/
const calcLex = (obj, wc, int, enc) => {
const counts = []
const weights = []
let key
for (key in obj) {
if (!obj.hasOwnProperty(key)) continue
if (Array.isArray(obj[key][0])) {
counts.push(obj[key][0].length)
} else {
counts.push(1)
}
weights.push(obj[key][1])
}
if (obj == null) return null
let lex = 0
let i
const len = counts.length
const words = Number(wc)
for (i = 0; i < len; i++) {
let weight = Number(weights[i])
if (enc === 'frequency') {
let count = Number(counts[i])
lex += (count / words) * weight
let word
for (word in obj) {
if (!obj.hasOwnProperty(word)) continue
if (enc === 'binary' || enc == null || wc == null) {
// weight + weight + weight etc
lex += Number(obj[word][2])
} else {
lex += weight
// (frequency / wordcount) * weight
lex += (Number(obj[word][1]) / Number(wc)) * Number(obj[word][2])
}
}
// add intercept value
lex += int
// return final lexical value
if (int != null) lex += Number(int)
return lex
}
/**
* Converts the lexical values object to an orientation string
* @function getOrientation
* @param {Object} obj lexical values object
* @return {string} 'Past', 'Present', or 'Future'
*/
const getOrientation = obj => {

@@ -174,27 +166,24 @@ const a = [obj.PAST, obj.PRESENT, obj.FUTURE]

let ori
let orientation = `No temporal orientation detected.`
if (indexOfMaxValue === 0) {
ori = 'past'
orientation = 'Past'
} else if (indexOfMaxValue === 1) {
ori = 'present'
orientation = 'Present'
} else if (indexOfMaxValue === 2) {
ori = 'future'
orientation = 'Future'
}
let str
if (a[indexOfMaxValue] < 0) {
str = `No temporal orientation association detected.`
} else {
str = ori
}
return str
return orientation
}
/**
* Analyse the temporal orientation of a string
* @function prospectimo
* @param {string} str input string
* @param {Object} opts options
* @return {Object|string} temporal orientation or lexical value based on opts
*/
const prospectimo = (str, opts) => {
// make sure there is input before proceeding
// error prevention
if (str == null) return null
// if str isn't a string, make it into one
if (typeof str !== 'string') str = str.toString()
// trim whitespace and convert to lowercase
str = str.toLowerCase().trim()
// default options

@@ -205,4 +194,5 @@ if (opts == null) {

'encoding': 'binary',
'bigrams': true, // match bigrams?
'trigrams': true // match trigrams?
'threshold': -999,
'bigrams': true,
'trigrams': true
}

@@ -212,20 +202,23 @@ }

opts.encoding = opts.encoding || 'binary'
opts.threshold = opts.threshold || -999
// convert to lowercase and trim whitespace
str = str.toLowerCase().trim()
// convert our string to tokens
let tokens = tokenizer(str)
// if no tokens return null
if (tokens == null) return { PAST: 0, PRESENT: 0, FUTURE: 0 }
// get wordcount
if (tokens == null) return null
// get wordcount before we add n-grams
const wordcount = tokens.length
// handle bigrams if wanted
// handle bi-grams if wanted
if (opts.bigrams) {
const bigrams = getBigrams(str)
const bigrams = getNGrams(str, 2)
tokens = tokens.concat(bigrams)
}
// handle trigrams if wanted
// handle tri-grams if wanted
if (opts.trigrams) {
const trigrams = getTrigrams(str)
const trigrams = getNGrams(str, 3)
tokens = tokens.concat(trigrams)
}
// get matches from array
const matches = getMatches(tokens)
const matches = getMatches(tokens, opts.threshold)
// calculate lexical useage

@@ -238,7 +231,4 @@ const enc = opts.encoding

// predict and return
if (opts.return === 'lex') {
return lex
} else {
return getOrientation(lex)
}
if (opts.return === 'lex') return lex
return getOrientation(lex)
}

@@ -245,0 +235,0 @@

{
"name": "prospectimo",
"version": "0.2.0",
"version": "0.3.0",
"description": "Analyse the temporal orientation of a string.",

@@ -5,0 +5,0 @@ "main": "index.js",

# prospectimo
Analyse the temporal orientation of a string.
Get the temporal orientation of a string.

@@ -9,12 +9,45 @@ ## Usage

const opts = {
'return': 'lex', // 'orientation' return string, 'lex' (default) returns object of lexical values
"threshold": -0.2,
"bigrams": true,
"trigrams": true
'return': 'lex', // 'orientation' returns a string, 'lex' (default) returns object of lexical values
'encoding': 'binary', // 'binary' (default), or 'frequency' - type of word encoding to use.
'threshold': -0.98, // a lexical weight threshold between 1.16 (include nothing), and -0.98 (include everything, default)
'bigrams': true, // compare against bigrams in the lexicon?
'trigrams': true, // compare against trigrams in the lexicon?
}
const text = "A big long string of text...";
let orientation = prospectimo(text, opts);
console.log(orientation)
const str = "A big long string of text...";
const orientation = prospectimo(str, opts);
console.log(orientation);
```
## Options
### 'return'
Valid options: 'lex' (default), or 'orientation'.
'lex' returns an object with 'PAST', 'PRESENT' and 'FUTURE' keys, each containing a lexical value for that orientation.
'orientation' returns a string stating either 'Past', 'Present', 'Future', or 'Unknown'.
### 'encoding'
Valid options: 'binary' (default), or 'frequency'.
'binary' calculates the lexical value as simply a sum of weights, i.e. weight[1] + weight[2] + etc...
'frequency' calculates the lexical value as (word frequency / total wordcount) * word weight
Unless you have a specific need for frequency encoding, we recommend you use binary only.
### 'threshold'
The lexicon contains weight values that are very small. You can exclude them using the threshold option.
The smallest value in the lexicon is -0.9772179. Therefore a threshold of -0.98 will include all words in the lexicon.
The largest value in the lexicon is 1.15807005. Therefore a threshold of 1.16 will include no words in the lexicon.
### 'bigrams' and 'trigrams'
The lexicon includes strings that are between one and three words in length. By default we will match against these using bi-grams and tri-grams, however you may want to disable these when analysing very long strings to save processing time and memory use.
## Acknowledgements

@@ -28,7 +61,7 @@

Used under the Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported licence
Used under the [Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported](http://creativecommons.org/licenses/by-nc-sa/3.0/)
# Licence
(C) 2017 P. Hughes
(C) 2017 [P. Hughes](www.phugh.es)
[Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported](http://creativecommons.org/licenses/by-nc-sa/3.0/)
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc