New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

node-rake

Package Overview
Dependencies
Maintainers
1
Versions
11
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

node-rake - npm Package Compare versions

Comparing version 0.0.9 to 1.0.0

_config.yml

34

__test__/rake.test.js

@@ -1,25 +0,27 @@

import rake from '../app'
import rake from '../app';
describe('rake', () => {
it('can be imported', () => {
expect(rake).toBeTruthy()
})
expect(rake).toBeTruthy();
});
describe('generate', () => {
const text = 'LDA stands for Latent Dirichlet Allocation. As already mentioned it is one of the more popular topic models which was initially proposed by Blei, Ng and Jordan in 2003. It is a generative model which, according to Wikipedia, allows sets of observations to be explained by unobserved groups that explain why some parts of the data are similar.';
let text = "LDA stands for Latent Dirichlet Allocation. As already mentioned it is one of the more popular topic models which was initially proposed by Blei, Ng and Jordan in 2003. It is a generative model which, according to Wikipedia, allows sets of observations to be explained by unobserved groups that explain why some parts of the data are similar."
it('extracts keywords from text', () => {
let results = rake.generate(text)
expect(results.length).toEqual(18)
})
const results = rake.generate(text);
expect(results.length).toEqual(18);
});
it('extracts keywords from text using a custom stopwords list', () => {
const opts = { stopwords: ['for', 'the', 'a', 'stands', 'test', 'man', 'woman'] };
const keywords = rake.generate(text, opts);
expect(keywords.length).toEqual(7);
});
it('trims leading and trailing spaces from keywords', () => {
let [firstKeyword, ...rest] = rake.generate(text)
expect(firstKeyword).toEqual("Latent Dirichlet Allocation")
})
})
})
const firstKeyword = rake.generate(text)[0];
expect(firstKeyword).toEqual('Latent Dirichlet Allocation');
});
});
});

@@ -1,10 +0,15 @@

var Rake = require('./index.js')
var path = require("path");
var stopwords_path = path.resolve(__dirname+'/'+'stopWords.txt')
const Rake = require('./index.js');
const path = require('path');
const fs = require('fs');
const stopwordsPath = path.resolve(`${__dirname}/stopWords.txt`);
module.exports = {
generate: function(content){
let instance = new Rake(content,stopwords_path)
return instance.generate()
}
}
generate(content, opts = {}) {
const fileData = fs.readFileSync(stopwordsPath).toString().split('\n');
const stopwordsList = opts.stopwords || fileData;
const instance = new Rake(content, stopwordsList);
return instance.generate();
},
};

@@ -1,123 +0,95 @@

var fs = require('fs');
class Rake {
constructor(text,stop_words_path){
constructor(text, stopwordsList) {
this.text = text;
this.stop_words_path = stop_words_path
this.regex_expression = this.buildRegex()
this.stopwords = stopwordsList;
this.regexExpression = this.buildRegex();
}
getStopWordsFromFile() {
var stopwords = fs.readFileSync(this.stop_words_path).toString().split("\n");
return stopwords
buildRegex() {
return this.stopwords.join('|');
}
buildRegex(){
var reg = ''
var stopwords_list = this.getStopWordsFromFile();
for(var i in stopwords_list){
var stopword = stopwords_list[i];
if(i!=stopwords_list.length-1){reg = reg + stopword + '|';}
else{reg = reg + stopword;}
}
return reg;
}
removeStopWords(sentence) {
var reg_exp = this.regex_expression
var r = reg_exp.substring(0, reg_exp.length - 1);
var reg = new RegExp('\\b(?:' + r + ')\\b','ig')
var filtered_sentence = sentence.replace(reg,'|').split('|')
return filtered_sentence
const regExp = this.regexExpression;
const r = regExp.substring(0, regExp.length - 1);
const reg = new RegExp(`\\b(?:${r})\\b`, 'ig');
const filteredSentence = sentence.replace(reg, '|').split('|');
return filteredSentence;
}
splitTextToSentences(text){
var sentences = text.match( /[^\.!\?\:\\]+/g );
var filtered_sentences = []
for(var i in sentences){
var s = sentences[i].replace(/ +/g, "");
if(s != ""){filtered_sentences.push(s)}
}
return filtered_sentences
splitTextToSentences(text) {
const sentences = text.match(/[^.!?:\\]+/g);
const filteredSentences = sentences.filter(s => s.replace(/ +/g, '') !== '');
return filteredSentences;
}
generatePhrases(sentence_list) {
var phrase_list = []
for (var s in sentence_list) {
var phrases = this.removeStopWords(sentence_list[s]);
for(var phrase in phrases) {
var phr = phrases[phrase].replace(/['!"“”’#$%&()\*+,\-\.\/:;<=>?@\[\\\]\^_`{|}~']/g,'')
if(phr != ' ' && phr != '') {
phrase_list.push(phr.trim())
}
}
}
return phrase_list
generatePhrases(sentenceList) {
const reg = /['!"“”’#$%&()*+,\-./:;<=>?@[\\\]^_`{|}~']/g;
const phrases = sentenceList.map(s => this.removeStopWords(s));
const phraseList = phrases.map(phrase => phrase
.filter(phr => (phr.replace(reg, '') !== ' ' && phr.replace(reg, '') !== ''))
.map(phr => phr.trim()),
);
const flattenedList = [].concat(...phraseList);
return flattenedList;
}
//Generates score for each word.
calculateKeywordScores(phrase_list) {
var word_freq = {}
var word_degree = {}
var word_score = {}
for(var phrase in phrase_list) {
var word_list = phrase_list[phrase].match(/[,.!?;:/‘’“”]|\b[0-9a-z']+\b/gi)
var word_list_degree = word_list.length
for(var word in word_list){
word_freq[word_list[word]] = 0;
word_freq[word_list[word]] +=1;
word_degree[word_list[word]] = 0;
word_degree[word_list[word]] += word_list_degree;
// Generates score for each word.
calculateKeywordScores(phraseList) {
const wordFreq = {};
const wordDegree = {};
const wordScore = {};
phraseList.forEach((phrase) => {
const wordList = phrase.match(/[,.!?;:/‘’“”]|\b[0-9a-z']+\b/gi);
if(wordList){
const wordListDegree = wordList.length;
wordList.forEach((word) => {
if (wordFreq[word]) {
wordFreq[word] += 1;
}
else {
wordFreq[word] = 1;
}
if (wordDegree[word]) {
wordDegree[word] += wordListDegree;
}
else {
wordDegree[word] = wordListDegree;
}
});
}
}
});
for(var i in word_freq) {
var freq = word_freq[i];
word_degree[freq] = word_degree[freq] + word_freq[freq];
}
for(var i in word_freq){
word_score[i] = 0;
word_score[i] = word_degree[i] / (word_freq[i] * 1.0);
}
return word_score
Object.values(wordFreq).forEach((freq) => { wordDegree[freq] += wordFreq[freq]; });
Object.keys(wordFreq).forEach((i) => { wordScore[i] = wordDegree[i] / (wordFreq[i] * 1.0); });
return wordScore;
}
//Generates score for each phrase based on the word scores.
calculatePhraseScores(phrase_list, word_score) {
var phrase_scores = {}
for(var p in phrase_list){
var phrase = phrase_list[p];
phrase_scores[phrase] = 0;
var word_list = phrase.match(/(\b[^\s]+\b)/g)
var candidate_score = 0;
for(var w in word_list){
var word = word_list[w];
candidate_score += word_score[word];
}
phrase_scores[phrase] = candidate_score;
}
return phrase_scores
// Generates score for each phrase based on the word scores.
calculatePhraseScores(phraseList, wordScore) {
const phraseScores = {};
phraseList.forEach((phrase) => {
phraseScores[phrase] = 0;
let candidateScore = 0;
const wordList = phrase.match(/(\b[^\s]+\b)/g);
wordList.forEach((word) => { candidateScore += wordScore[word]; });
phraseScores[phrase] = candidateScore;
});
return phraseScores;
}
sortPhrases(obj) {
var keys = []; for(var key in obj) keys.push(key);
return keys.sort(function(a,b){return obj[b]-obj[a]});
return Object.keys(obj).sort((a, b) => obj[b] - obj[a]);
}
generate() {
var sentence_list = this.splitTextToSentences(this.text);
var phrases_list = this.generatePhrases(sentence_list);
var word_scores = this.calculateKeywordScores(phrases_list)
var phrase_scores = this.calculatePhraseScores(phrases_list, word_scores)
var result = this.sortPhrases(phrase_scores)
return result
const sentenceList = this.splitTextToSentences(this.text);
const phrasesList = this.generatePhrases(sentenceList);
const wordScores = this.calculateKeywordScores(phrasesList);
const phraseScores = this.calculatePhraseScores(phrasesList, wordScores);
const result = this.sortPhrases(phraseScores);
return result;
}
}
module.exports = Rake
module.exports = Rake;
{
"name": "node-rake",
"version": "0.0.9",
"version": "1.0.0",
"description": "A NodeJS implementation of the Rapid Automatic Keyword Extraction algorithm.",

@@ -26,4 +26,7 @@ "main": "app.js",

"babel-preset-es2015": "^6.22.0",
"eslint": "^4.4.0",
"eslint-config-airbnb-base": "^11.3.1",
"eslint-plugin-import": "^2.7.0",
"jest": "^18.1.0"
}
}
# node-rake
[![npm](https://img.shields.io/npm/dm/node-rake.svg)](https://www.npmjs.com/package/node-rake) [![npm](https://img.shields.io/npm/v/node-rake.svg)]()
[![Build Status](https://travis-ci.org/waseem18/node-rake.svg?branch=master)](https://travis-ci.org/waseem18/node-rake) [![npm](https://img.shields.io/npm/dm/node-rake.svg)](https://www.npmjs.com/package/node-rake) [![npm](https://img.shields.io/npm/v/node-rake.svg)]()

@@ -13,6 +13,23 @@

```javascript
import rake from 'node-rake'
rake.generate(text, opts);
```
The `opts` param is an object that allows to pass custom params to generate method. Options:
- `stopwords`: Optional. An `array` containing a custom stopwords list. By default, the method uses a stopwords list which comes along (take a look at [Stopwords source](#stopwords-source)).
## Example of usage:
```javascript
const rake = require('node-rake')
const keywords = rake.generate("LDA stands for Latent Dirichlet Allocation")
// it'll output: [ 'Latent Dirichlet Allocation', 'LDA stands' ]
//or
let rake = require('node-rake')
let keywords = rake.generate("LDA stands for Latent Dirichlet Allocation")
const myStopwords = ['for', 'the', 'a', 'stands', 'test', 'man', 'woman'];
const opts = {stopwords: myStopwords};
const keywords = rake.generate("LDA stands for Latent Dirichlet Allocation", opts);
// it'll output: [ 'Latent Dirichlet Allocation', 'LDA' ]
```

@@ -19,0 +36,0 @@

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc