@@ -0,102 +1,56 @@
		/**
		* @author Titus Wormer
		* @copyright 2014-2015 Titus Wormer
		* @license MIT
		* @module retext:keywords
		* @fileoverview Keyword extraction with Retext.
		*/

		'use strict';

		/*
		* Module dependencies.
		* Dependencies.
		*/

		var pos,
		stemmer,
		visit;
		var stemmer = require('stemmer');
		var visit = require('unist-util-visit');
		var nlcstToString = require('nlcst-to-string');
		var pos = require('retext-pos');

		pos = require('retext-pos');
		stemmer = require('retext-porter-stemmer');
		visit = require('retext-visit');

		/*
		* Constants.
		/**
		* Get the stem of a node.
		*
		* @param {Node} node - Node to stem.
		* @return {string} - Stemmed node.
		*/
		function stemNode(node) {
		return stemmer(nlcstToString(node)).toLowerCase();
		}

		var has;

		has = Object.prototype.hasOwnProperty;

		/**
		* Reverse sort: from 9 to 0.
		* Check whether `value` is upper-case.
		*
		* @param {number} a
		* @param {number} b
		* @param {string} value - Value to check.
		* @return {boolean} - Whether `value` is upper-case.
		*/
		function reverseSort(a, b) {
		return b - a;
		function isUpperCase(value) {
		return value === String(value).toUpperCase();
		}

		/**
		* Get the top results from an occurance map.
		* Reverse sort: from 9 to 0.
		*
		* @param {Object.<string, Object>} results - Dictionary of
		* stems mapping to objects containing `nodes`, `stem`,
		* and `score` properties.
		* @param {number} minimum - Minimum number of results to
		* return.
		* @return {Array.<Object>}
		* @param {number} a - First.
		* @param {number} b - Second.
		* @return {number} - Difference.
		*/
		function filterResults(results, minimum) {
		var filteredResults,
		matrix,
		indices,
		column,
		key,
		score,
		interpolatedScore,
		index,
		otherIndex,
		maxScore;

		filteredResults = [];
		indices = [];
		matrix = {};

		for (key in results) {
		score = results[key].score;

		if (!has.call(matrix, score)) {
		matrix[score] = [];
		indices.push(score);
		}

		matrix[score].push(results[key]);
		}

		indices.sort(reverseSort);

		maxScore = indices[0];

		index = -1;

		while (indices[++index]) {
		score = indices[index];
		column = matrix[score];

		interpolatedScore = score / maxScore;
		otherIndex = -1;

		while (column[++otherIndex]) {
		column[otherIndex].score = interpolatedScore;
		}

		filteredResults = filteredResults.concat(column);

		if (filteredResults.length >= minimum) {
		break;
		}
		}

		return filteredResults;
		function reverse(a, b) {
		return b - a;
		}

		/**
		* Get whether or not a `node` is important.
		* Check whether or not a `node` is important.
		*
		* @param {Node} node
		* @return {boolean}
		* @param {Node} node - Node to check.
		* @return {boolean} - Whether `node` is important.
		*/
		@@ -106,3 +60,4 @@ function isImportant(node) {
		node &&
		node.type === 'WordNode' &&
		node.data &&
		node.data.partOfSpeech &&
		(
		@@ -112,3 +67,3 @@ node.data.partOfSpeech.indexOf('N') === 0 \|\|
		node.data.partOfSpeech === 'JJ' &&
		node.toString().charAt(0).match(/[A-Z]/)
		isUpperCase(nlcstToString(node).charAt(0))
		)
		@@ -122,19 +77,23 @@ )
		*
		* @param {Node} node
		* @param {Node} node - Parent to search in.
		* @return {Array.<Object>}
		*/
		function getImportantWords(node) {
		var importantWords;
		var words = {};

		importantWords = {};

		node.visit(node.WORD_NODE, function (word) {
		visit(node, 'WordNode', function (word, index, parent) {
		var match;
		var stem;

		if (isImportant(word)) {
		stem = word.data.stem.toLowerCase();
		stem = stemNode(word);
		match = {
		'node': word,
		'index': index,
		'parent': parent
		};

		if (!has.call(importantWords, stem)) {
		importantWords[stem] = {
		'nodes': [word],
		if (!words[stem]) {
		words[stem] = {
		'matches': [match],
		'stem': stem,
		@@ -144,4 +103,4 @@ 'score': 1
		} else {
		importantWords[stem].nodes.push(word);
		importantWords[stem].score++;
		words[stem].matches.push(match);
		words[stem].score++;
		}
		@@ -151,19 +110,63 @@ }

		return importantWords;
		return words;
		}

		/**
		* Get the top important words in `self`.
		* Get the top results from an occurance map.
		*
		* @param {Object?} options
		* @param {number?} options.minimum
		* @this {Node} node
		* @param {Object.<string, Object>} results - Map of stems
		* mapping to objects containing `nodes`, `stem`, and
		* `score` properties.
		* @param {number} maximum - Try to get at least `maximum`
		* results.
		* @return {Array.<Object>}
		*/
		function getKeywords(options) {
		var minimum;
		function filterResults(results, maximum) {
		var filteredResults = [];
		var indices = [];
		var matrix = {};
		var column;
		var key;
		var score;
		var interpolated;
		var index;
		var otherIndex;
		var maxScore;

		minimum = options && has.call(options, 'minimum') ? options.minimum : 5;
		for (key in results) {
		score = results[key].score;

		return filterResults(getImportantWords(this), minimum);
		if (!matrix[score]) {
		matrix[score] = [];
		indices.push(score);
		}

		matrix[score].push(results[key]);
		}

		indices.sort(reverse);

		maxScore = indices[0];

		index = -1;

		while (indices[++index]) {
		score = indices[index];
		column = matrix[score];

		interpolated = score / maxScore;
		otherIndex = -1;

		while (column[++otherIndex]) {
		column[otherIndex].score = interpolated;
		}

		filteredResults = filteredResults.concat(column);

		if (filteredResults.length >= maximum) {
		break;
		}
		}

		return filteredResults;
		}
		@@ -174,26 +177,26 @@
		*
		* @param {Node} node
		* @param {string} direction - either "prev" or "next".
		* @param {Node} node - Node to start search at.
		* @param {number} index - Position of `node` in `parent`.
		* @param {Node} parent - Parent of `node`.
		* @param {number} offset - Offset to the next node. `-1`
		* when iterating backwards, `1` when iterating forwards.
		* @return {Object}
		*/
		function findPhraseInDirection(node, direction) {
		var nodes,
		stems,
		words,
		queue;
		function findPhraseInDirection(node, index, parent, offset) {
		var children = parent.children;
		var nodes = [];
		var stems = [];
		var words = [];
		var queue = [];
		var child;

		nodes = [];
		stems = [];
		words = [];
		queue = [];
		while (children[index += offset]) {
		child = children[index];

		node = node[direction];

		while (node) {
		if (node.type === node.WHITE_SPACE_NODE) {
		queue.push(node);
		} else if (isImportant(node)) {
		nodes = nodes.concat(queue, [node]);
		words.push(node);
		stems.push(node.data.stem.toLowerCase());
		if (child.type === 'WhiteSpaceNode') {
		queue.push(child);
		} else if (isImportant(child)) {
		nodes = nodes.concat(queue, [child]);
		words.push(child);
		stems.push(stemNode(child));
		queue = [];
		@@ -203,4 +206,2 @@ } else {
		}

		node = node[direction];
		}
		@@ -219,9 +220,9 @@
		*
		* @param {Array.<*>} prev
		* @param {*} current
		* @param {Array.<*>} next
		* @param {Array.<*>} prev - Reversed array before `current`.
		* @param {*} current - Current thing.
		* @param {Array.<*>} next - Things after `current`.
		* @return {Array.<*>}
		*/
		function merge(prev, current, next) {
		return prev.reverse().concat([current], next);
		return prev.concat().reverse().concat([current], next);
		}
		@@ -232,13 +233,14 @@
		*
		* @param {Node} node
		* @param {Object} match - Single match.
		* @return {Object}
		*/
		function findPhrase(node) {
		var prev = findPhraseInDirection(node, 'prev'),
		next = findPhraseInDirection(node, 'next'),
		stems = merge(prev.stems, node.data.stem.toLowerCase(), next.stems);
		function findPhrase(match) {
		var node = match.node;
		var prev = findPhraseInDirection(node, match.index, match.parent, -1);
		var next = findPhraseInDirection(node, match.index, match.parent, 1);
		var stems = merge(prev.stems, stemNode(node), next.stems);

		return {
		'stems': stems,
		'value': stems.join(' ').toLowerCase(),
		'value': stems.join(' '),
		'nodes': merge(prev.nodes, node, next.nodes)
		@@ -251,28 +253,24 @@ };
		*
		* @param {Object?} options
		* @param {number?} options.minimum
		* @this {Node} node
		* @param {Object.<string, Object>} results - Map of stems
		* mapping to objects containing `nodes`, `stem`, and
		* `score` properties.
		* @param {number} maximum - Try to get at least `maximum`
		* results.
		* @return {Array.<Object>}
		*/
		function getKeyphrases(options) {
		var stemmedPhrases,
		initialWords,
		stemmedPhrase,
		index,
		otherIndex,
		importantWords,
		keyword,
		nodes,
		phrase,
		stems,
		minimum,
		score;
		function getKeyphrases(results, maximum) {
		var stemmedPhrases = {};
		var initialWords = [];
		var stemmedPhrase;
		var index;
		var length;
		var otherIndex;
		var keyword;
		var matches;
		var phrase;
		var stems;
		var score;
		var first;
		var match;

		stemmedPhrases = {};
		initialWords = [];

		minimum = options && has.call(options, 'minimum') ? options.minimum : 5;

		importantWords = getImportantWords(this);

		/*
		@@ -282,5 +280,5 @@ * Iterate over all grouped important words...

		for (keyword in importantWords) {
		nodes = importantWords[keyword].nodes;

		for (keyword in results) {
		matches = results[keyword].matches;
		length = matches.length;
		index = -1;
		@@ -292,5 +290,12 @@

		while (nodes[++index]) {
		phrase = findPhrase(nodes[index]);
		while (++index < length) {
		phrase = findPhrase(matches[index]);
		stemmedPhrase = stemmedPhrases[phrase.value];
		first = phrase.nodes[0];

		match = {
		'nodes': phrase.nodes,
		'parent': matches[index].parent
		};

		/*
		@@ -301,5 +306,3 @@ * If we've detected the same stemmed

		if (has.call(stemmedPhrases, phrase.value)) {
		stemmedPhrase = stemmedPhrases[phrase.value];

		if (stemmedPhrase) {
		/*
		@@ -319,5 +322,5 @@ * Add weight per phrase to the score of

		if (initialWords.indexOf(phrase.nodes[0]) === -1) {
		initialWords.push(phrase.nodes[0]);
		stemmedPhrase.nodes.push(phrase.nodes);
		if (initialWords.indexOf(first) === -1) {
		initialWords.push(first);
		stemmedPhrase.matches.push(match);
		}
		@@ -329,3 +332,3 @@ } else {

		initialWords.push(phrase.nodes[0]);
		initialWords.push(first);

		@@ -338,3 +341,3 @@ /*
		while (stems[++otherIndex]) {
		score += importantWords[stems[otherIndex]].score;
		score += results[stems[otherIndex]].score;
		}
		@@ -347,3 +350,3 @@
		'value': phrase.value,
		'nodes': [phrase.nodes]
		'matches': [match]
		};
		@@ -365,39 +368,69 @@ }
		phrase.score = Math.round(
		phrase.score * phrase.nodes.length / phrase.stems.length
		phrase.score * phrase.matches.length / phrase.stems.length
		);
		}

		return filterResults(stemmedPhrases, minimum);
		return filterResults(stemmedPhrases, maximum);
		}

		/**
		* Define `keywords`.
		* Clone the given map of words.
		*
		* @param {Retext} retext
		* This is a two level-deep clone.
		*
		* @param {Object} words - Important words.
		* @return {Object} - Cloned words.
		*/
		function keywords(retext) {
		var TextOM,
		parentPrototype,
		elementPrototype;
		function cloneMatches(words) {
		var result = {};
		var key;
		var match;

		TextOM = retext.TextOM;
		parentPrototype = TextOM.Parent.prototype;
		elementPrototype = TextOM.Element.prototype;
		for (key in words) {
		match = words[key];
		result[key] = {
		'matches': match.matches,
		'stem': match.stem,
		'score': match.score
		}
		}

		retext
		.use(stemmer)
		.use(pos)
		.use(visit);
		return result;
		}

		parentPrototype.keywords = getKeywords;
		elementPrototype.keywords = getKeywords;
		/**
		* Attach.
		*
		* @param {Retext} retext - Instance.
		* @param {Object?} [options] - Configuration.
		* @param {number?} [options.maximum] - Try to get at
		* least `maximum` results.
		* @return {Function} - `transformer`.
		*/
		function attacher(retext, options) {
		var maximum = (options \|\| {}).maximum \|\| 5;

		parentPrototype.keyphrases = getKeyphrases;
		elementPrototype.keyphrases = getKeyphrases;
		retext.use(pos);

		/**
		* Attach keywords in `cst` to `file`.
		*
		* @param {NLCSTNode} cst - Node.
		* @param {VFile} file - Virtual file.
		*/
		function transformer(cst, file) {
		var space = file.namespace('retext');
		var important = getImportantWords(cst);

		space.keywords = filterResults(cloneMatches(important), maximum);
		space.keyphrases = getKeyphrases(important, maximum);
		}

		return transformer;
		}

		/*
		* Expose `keywords`.
		* Expose.
		*/

		module.exports = keywords;
		module.exports = attacher;

package.json

		{
		"name": "retext-keywords",
		"version": "0.2.1",
		"version": "1.0.0",
		"description": "Keyword extraction with Retext",
		@@ -15,6 +15,10 @@ "license": "MIT",
		"dependencies": {
		"retext-porter-stemmer": "^0.2.2",
		"retext-pos": "^0.2.1",
		"retext-visit": "^0.2.2"
		"nlcst-to-string": "^0.1.5",
		"retext-pos": "^1.0.0",
		"stemmer": "^0.1.4",
		"unist-util-visit": "^1.0.0"
		},
		"files": [
		"index.js"
		],
		"repository": {
		@@ -26,24 +30,32 @@ "type": "git",
		"devDependencies": {
		"eslint": "^0.12.0",
		"browserify": "^11.0.1",
		"chalk": "^1.0.0",
		"eslint": "^1.0.0",
		"esmangle": "^1.0.1",
		"istanbul": "^0.3.0",
		"jscs": "^1.0.0",
		"jscs-jsdoc": "^0.4.0",
		"matcha": "^0.6.0",
		"jscs": "^2.0.0",
		"jscs-jsdoc": "^1.0.0",
		"mdast": "^0.28.0",
		"mdast-comment-config": "^0.1.2",
		"mdast-github": "^0.3.2",
		"mdast-lint": "^0.4.2",
		"mdast-slug": "^0.1.1",
		"mdast-validate-links": "^0.3.1",
		"mocha": "^2.0.0",
		"retext": "^0.5.0"
		"retext": "^1.0.0-rc.2"
		},
		"scripts": {
		"test-api": "_mocha --check-leaks test.js",
		"test-coveralls": "istanbul cover _mocha --report lcovonly -- --check-leaks test.js",
		"test-coverage": "istanbul cover _mocha -- --check-leaks test.js",
		"test-travis": "npm run test-coveralls",
		"test-api": "mocha --check-leaks test.js",
		"test-coverage": "istanbul cover _mocha -- test.js",
		"test-travis": "npm run test-coverage",
		"test": "npm run test-api",
		"lint-api": "eslint index.js",
		"lint-benchmark": "eslint --global bench,before,suite,set benchmark.js",
		"lint-test": "eslint --env mocha test.js",
		"lint-style": "jscs --reporter inline index.js benchmark.js test.js",
		"lint": "npm run lint-api && npm run lint-benchmark && npm run lint-test && npm run lint-style",
		"lint-api": "eslint .",
		"lint-style": "jscs --reporter inline .",
		"lint": "npm run lint-api && npm run lint-style",
		"make": "npm run lint && npm run test-coverage",
		"benchmark": "matcha benchmark.js"
		"build-bundle": "browserify index.js --ignore-missing --no-builtins --standalone retextPOS > retext-keywords.js",
		"postbuild-bundle": "esmangle retext-keywords.js > retext-keywords.min.js",
		"build-md": "mdast . --quiet",
		"build": "npm run build-bundle && npm run build-md"
		}
		}

Readme.md

retext-keywords - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics

Dependency changes