Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

retext-keywords

Package Overview
Dependencies
Maintainers
1
Versions
28
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

retext-keywords - npm Package Compare versions

Comparing version 4.0.1 to 4.0.2

license

288

index.js

@@ -1,50 +0,50 @@

'use strict';
'use strict'
var stemmer = require('stemmer');
var visit = require('unist-util-visit');
var nlcstToString = require('nlcst-to-string');
var pos = require('retext-pos');
var stemmer = require('stemmer')
var visit = require('unist-util-visit')
var nlcstToString = require('nlcst-to-string')
var pos = require('retext-pos')
module.exports = keywords;
module.exports = keywords
var own = {}.hasOwnProperty;
var own = {}.hasOwnProperty
function keywords(options) {
this.use(pos).use(gatherKeywords, options);
this.use(pos).use(gatherKeywords, options)
}
function gatherKeywords(options) {
var maximum = (options || {}).maximum || 5;
var maximum = (options || {}).maximum || 5
return transformer;
return transformer
function transformer(tree, file) {
var important = getImportantWords(tree);
var important = getImportantWords(tree)
file.data.keywords = filterResults(cloneMatches(important), maximum);
file.data.keyphrases = getKeyphrases(important, maximum);
file.data.keywords = filterResults(cloneMatches(important), maximum)
file.data.keyphrases = getKeyphrases(important, maximum)
}
}
/* Get following or preceding important words or white space. */
// Get following or preceding important words or white space.
function findPhraseInDirection(node, index, parent, offset) {
var children = parent.children;
var nodes = [];
var stems = [];
var words = [];
var queue = [];
var child;
var children = parent.children
var nodes = []
var stems = []
var words = []
var queue = []
var child
while (children[index += offset]) {
child = children[index];
while (children[(index += offset)]) {
child = children[index]
if (child.type === 'WhiteSpaceNode') {
queue.push(child);
queue.push(child)
} else if (isImportant(child)) {
nodes = nodes.concat(queue, [child]);
words.push(child);
stems.push(stemNode(child));
queue = [];
nodes = nodes.concat(queue, [child])
words.push(child)
stems.push(stemNode(child))
queue = []
} else {
break;
break
}

@@ -57,32 +57,32 @@ }

nodes: nodes
};
}
}
/* Get the top important phrases in `self`. */
// Get the top important phrases in `self`.
function getKeyphrases(results, maximum) {
var stemmedPhrases = {};
var initialWords = [];
var stemmedPhrase;
var index;
var length;
var otherIndex;
var keyword;
var matches;
var phrase;
var stems;
var score;
var first;
var match;
var stemmedPhrases = {}
var initialWords = []
var stemmedPhrase
var index
var length
var otherIndex
var keyword
var matches
var phrase
var stems
var score
var first
var match
/* Iterate over all grouped important words... */
// Iterate over all grouped important words...
for (keyword in results) {
matches = results[keyword].matches;
length = matches.length;
index = -1;
matches = results[keyword].matches
length = matches.length
index = -1
/* Iterate over every occurence of a certain keyword... */
// Iterate over every occurence of a certain keyword...
while (++index < length) {
phrase = findPhrase(matches[index]);
stemmedPhrase = stemmedPhrases[phrase.value];
first = phrase.nodes[0];
phrase = findPhrase(matches[index])
stemmedPhrase = stemmedPhrases[phrase.value]
first = phrase.nodes[0]

@@ -92,30 +92,26 @@ match = {

parent: matches[index].parent
};
}
/* If we've detected the same stemmed
* phrase somewhere. */
// If we've detected the same stemmed phrase somewhere.
if (own.call(stemmedPhrases, phrase.value)) {
/* Add weight per phrase to the score of
* the phrase. */
stemmedPhrase.score += stemmedPhrase.weight;
// Add weight per phrase to the score of the phrase.
stemmedPhrase.score += stemmedPhrase.weight
/* If this is the first time we walk over
* the phrase (exact match but containing
* another important word), add it to the
* list of matching phrases. */
// If this is the first time we walk over the phrase (exact match but
// containing another important word), add it to the list of matching
// phrases.
if (initialWords.indexOf(first) === -1) {
initialWords.push(first);
stemmedPhrase.matches.push(match);
initialWords.push(first)
stemmedPhrase.matches.push(match)
}
} else {
otherIndex = -1;
score = -1;
stems = phrase.stems;
otherIndex = -1
score = -1
stems = phrase.stems
initialWords.push(first);
initialWords.push(first)
/* For every stem in phrase, add its
* score to score. */
// For every stem in phrase, add its score to score.
while (stems[++otherIndex]) {
score += results[stems[otherIndex]].score;
score += results[stems[otherIndex]].score
}

@@ -129,3 +125,3 @@

matches: [match]
};
}
}

@@ -136,79 +132,80 @@ }

for (stemmedPhrase in stemmedPhrases) {
phrase = stemmedPhrases[stemmedPhrase];
phrase = stemmedPhrases[stemmedPhrase]
/* Modify its score to be the rounded result of
* multiplying it with the number of occurances,
* and dividing it by the ammount of words in the
* phrase. */
// Modify its score to be the rounded result of multiplying it with the
// number of occurances, and dividing it by the ammount of words in the
// phrase.
phrase.score = Math.round(
phrase.score * phrase.matches.length / phrase.stems.length
);
(phrase.score * phrase.matches.length) / phrase.stems.length
)
}
return filterResults(stemmedPhrases, maximum);
return filterResults(stemmedPhrases, maximum)
}
/* Get the top results from an occurance map. */
// Get the top results from an occurance map.
function filterResults(results, maximum) {
var filteredResults = [];
var indices = [];
var matrix = {};
var column;
var key;
var score;
var interpolated;
var index;
var otherIndex;
var maxScore;
var filteredResults = []
var indices = []
var matrix = {}
var column
var key
var score
var interpolated
var index
var otherIndex
var maxScore
for (key in results) {
score = results[key].score;
score = results[key].score
if (!matrix[score]) {
matrix[score] = [];
indices.push(score);
matrix[score] = []
indices.push(score)
}
matrix[score].push(results[key]);
matrix[score].push(results[key])
}
indices.sort(reverse);
indices.sort(reverse)
maxScore = indices[0];
maxScore = indices[0]
index = -1;
index = -1
while (indices[++index]) {
score = indices[index];
column = matrix[score];
score = indices[index]
column = matrix[score]
interpolated = score / maxScore;
otherIndex = -1;
interpolated = score / maxScore
otherIndex = -1
while (column[++otherIndex]) {
column[otherIndex].score = interpolated;
column[otherIndex].score = interpolated
}
filteredResults = filteredResults.concat(column);
filteredResults = filteredResults.concat(column)
if (filteredResults.length >= maximum) {
break;
break
}
}
return filteredResults;
return filteredResults
}
/* Merge a previous array, with a current value, and
* a following array. */
// Merge a previous array, with a current value, and a following array.
function merge(prev, current, next) {
return prev.concat().reverse().concat([current], next);
return prev
.concat()
.reverse()
.concat([current], next)
}
/* Find the phrase surrounding a node. */
// Find the phrase surrounding a node.
function findPhrase(match) {
var node = match.node;
var prev = findPhraseInDirection(node, match.index, match.parent, -1);
var next = findPhraseInDirection(node, match.index, match.parent, 1);
var stems = merge(prev.stems, stemNode(node), next.stems);
var node = match.node
var prev = findPhraseInDirection(node, match.index, match.parent, -1)
var next = findPhraseInDirection(node, match.index, match.parent, 1)
var stems = merge(prev.stems, stemNode(node), next.stems)

@@ -219,19 +216,19 @@ return {

nodes: merge(prev.nodes, node, next.nodes)
};
}
}
/* Get most important words in `node`. */
// Get most important words in `node`.
function getImportantWords(node) {
var words = {};
var words = {}
visit(node, 'WordNode', visitor);
visit(node, 'WordNode', visitor)
return words;
return words
function visitor(word, index, parent) {
var match;
var stem;
var match
var stem
if (isImportant(word)) {
stem = stemNode(word);
stem = stemNode(word)
match = {

@@ -241,3 +238,3 @@ node: word,

parent: parent
};
}

@@ -249,6 +246,6 @@ if (!own.call(words, stem)) {

score: 1
};
}
} else {
words[stem].matches.push(match);
words[stem].score++;
words[stem].matches.push(match)
words[stem].score++
}

@@ -259,11 +256,10 @@ }

/* Clone the given map of words.
* This is a two level-deep clone. */
// Clone the given map of words. This is a two level-deep clone.
function cloneMatches(words) {
var result = {};
var key;
var match;
var result = {}
var key
var match
for (key in words) {
match = words[key];
match = words[key]

@@ -274,9 +270,9 @@ result[key] = {

score: match.score
};
}
}
return result;
return result
}
/* Check if `node` is important. */
// Check if `node` is important.
function isImportant(node) {

@@ -287,25 +283,21 @@ return (

node.data.partOfSpeech &&
(
node.data.partOfSpeech.indexOf('N') === 0 ||
(
node.data.partOfSpeech === 'JJ' &&
isUpperCase(nlcstToString(node).charAt(0))
)
)
);
(node.data.partOfSpeech.indexOf('N') === 0 ||
(node.data.partOfSpeech === 'JJ' &&
isUpperCase(nlcstToString(node).charAt(0))))
)
}
/* Check if `value` is upper-case. */
// Check if `value` is upper-case.
function isUpperCase(value) {
return value === String(value).toUpperCase();
return value === String(value).toUpperCase()
}
/* Reverse sort: from 9 to 0. */
// Reverse sort: from 9 to 0.
function reverse(a, b) {
return b - a;
return b - a
}
/* Get the stem of a node. */
// Get the stem of a node.
function stemNode(node) {
return stemmer(nlcstToString(node)).toLowerCase();
return stemmer(nlcstToString(node)).toLowerCase()
}
{
"name": "retext-keywords",
"version": "4.0.1",
"version": "4.0.2",
"description": "Keyword extraction with Retext",

@@ -14,7 +14,7 @@ "license": "MIT",

],
"repository": "wooorm/retext-keywords",
"bugs": "https://github.com/wooorm/retext-keywords/issues",
"author": "Titus Wormer <tituswormer@gmail.com> (http://wooorm.com)",
"repository": "retextjs/retext-keywords",
"bugs": "https://github.com/retextjs/retext-keywords/issues",
"author": "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)",
"contributors": [
"Titus Wormer <tituswormer@gmail.com> (http://wooorm.com)",
"Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)",
"Vladimir Starkov <iamstarkov@gmail.com>"

@@ -32,20 +32,20 @@ ],

"devDependencies": {
"browserify": "^14.1.0",
"esmangle": "^1.0.0",
"nyc": "^11.0.0",
"remark-cli": "^4.0.0",
"remark-preset-wooorm": "^3.0.0",
"browserify": "^16.0.0",
"nyc": "^13.0.0",
"prettier": "^1.14.3",
"remark-cli": "^6.0.0",
"remark-preset-wooorm": "^4.0.0",
"retext": "^5.0.0",
"tape": "^4.0.0",
"xo": "^0.18.0"
"tinyify": "^2.4.3",
"xo": "^0.23.0"
},
"scripts": {
"build-md": "remark . -qfo",
"build-bundle": "browserify index.js --ignore-missing --bare -s retextKeywords > retext-keywords.js",
"build-mangle": "esmangle retext-keywords.js > retext-keywords.min.js",
"build": "npm run build-md && npm run build-bundle && npm run build-mangle",
"lint": "xo",
"format": "remark . -qfo && prettier --write \"**/*.js\" && xo --fix",
"build-bundle": "browserify . -s retextKeywords > retext-keywords.js",
"build-mangle": "browserify . -s retextKeywords -p tinyify > retext-keywords.min.js",
"build": "npm run build-bundle && npm run build-mangle",
"test-api": "node test",
"test-coverage": "nyc --reporter lcov tape test.js",
"test": "npm run build && npm run lint && npm run test-coverage"
"test": "npm run format && npm run build && npm run test-coverage"
},

@@ -58,4 +58,12 @@ "nyc": {

},
"prettier": {
"tabWidth": 2,
"useTabs": false,
"singleQuote": true,
"bracketSpacing": false,
"semi": false,
"trailingComma": "none"
},
"xo": {
"space": true,
"prettier": true,
"esnext": false,

@@ -62,0 +70,0 @@ "rules": {

@@ -1,2 +0,2 @@

# retext-keywords [![Build Status][travis-badge]][travis] [![Coverage Status][codecov-badge]][codecov]
# retext-keywords [![Build][build-badge]][build] [![Coverage][coverage-badge]][coverage] [![Downloads][downloads-badge]][downloads] [![Chat][chat-badge]][chat]

@@ -15,3 +15,3 @@ Keyword extraction with [**retext**][retext].

Say we have the following file, `example.txt`, with the first three paragraphs
Say we have the following file, `example.txt`, with the first four paragraphs
on [Term Extraction][term-extraction] from Wikipedia:

@@ -32,24 +32,28 @@

```javascript
var vfile = require('to-vfile');
var retext = require('retext');
var keywords = require('retext-keywords');
var nlcstToString = require('nlcst-to-string');
var vfile = require('to-vfile')
var retext = require('retext')
var keywords = require('retext-keywords')
var toString = require('nlcst-to-string')
retext()
.use(keywords)
.process(vfile.readSync('example.txt'), function (err, file) {
if (err) throw err;
.process(vfile.readSync('example.txt'), done)
console.log('Keywords:');
file.data.keywords.forEach(function (keyword) {
console.log(nlcstToString(keyword.matches[0].node));
});
function done(err, file) {
if (err) throw err
console.log();
console.log('Key-phrases:');
file.data.keyphrases.forEach(function (phrase) {
console.log(phrase.matches[0].nodes.map(nlcstToString).join(''));
});
}
);
console.log('Keywords:')
file.data.keywords.forEach(function(keyword) {
console.log(toString(keyword.matches[0].node))
})
console.log()
console.log('Key-phrases:')
file.data.keyphrases.forEach(function(phrase) {
console.log(phrase.matches[0].nodes.map(stringify).join(''))
function stringify(value) {
return toString(value)
}
})
}
```

@@ -122,2 +126,10 @@

## Contribute
See [`contributing.md` in `retextjs/retext`][contributing] for ways to get
started.
This organisation has a [Code of Conduct][coc]. By interacting with this
repository, organisation, or community you agree to abide by its terms.
## License

@@ -129,18 +141,30 @@

[travis-badge]: https://img.shields.io/travis/wooorm/retext-keywords.svg
[build-badge]: https://img.shields.io/travis/retextjs/retext-keywords.svg
[travis]: https://travis-ci.org/wooorm/retext-keywords
[build]: https://travis-ci.org/retextjs/retext-keywords
[codecov-badge]: https://img.shields.io/codecov/c/github/wooorm/retext-keywords.svg
[coverage-badge]: https://img.shields.io/codecov/c/github/retextjs/retext-keywords.svg
[codecov]: https://codecov.io/github/wooorm/retext-keywords
[coverage]: https://codecov.io/github/retextjs/retext-keywords
[downloads-badge]: https://img.shields.io/npm/dm/retext-keywords.svg
[downloads]: https://www.npmjs.com/package/retext-keywords
[chat-badge]: https://img.shields.io/badge/join%20the%20community-on%20spectrum-7b16ff.svg
[chat]: https://spectrum.chat/unified/retext
[npm]: https://docs.npmjs.com/cli/install
[license]: LICENSE
[license]: license
[author]: http://wooorm.com
[author]: https://wooorm.com
[retext]: https://github.com/wooorm/retext
[retext]: https://github.com/retextjs/retext
[term-extraction]: http://en.wikipedia.org/wiki/Terminology_extraction
[term-extraction]: https://en.wikipedia.org/wiki/Terminology_extraction
[contributing]: https://github.com/retextjs/retext/blob/master/contributing.md
[coc]: https://github.com/retextjs/retext/blob/master/code-of-conduct.md
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc