🚀 Big News: Socket Acquires Coana to Bring Reachability Analysis to Every Appsec Team.Learn more →

Book a Demo Install Sign in

sum

Package Overview

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

sum - npm Package Compare versions

Comparing version

0.1.1

0.1.2

.travis.yml

.vagrant/machines/default/virtualbox/action_provision

.vagrant/machines/default/virtualbox/action_set_name

.vagrant/machines/default/virtualbox/id

.vagrant/machines/default/virtualbox/index_uuid

.vagrant/machines/default/virtualbox/synced_folders

CHANGELOG.md

vagrant_bootstrap.sh

Vagrantfile

package.json

		{
		"author": "alex.topliceanu <alext@vibetrace.com> (https://github.com/topliceanu)",
		"name": "sum",
		"description": "text summarization utility",
		"version": "0.1.1",
		"homepage": "https://github.com/topliceanu/text-summarization",
		"repository": {
		"type": "git",
		"url": "git@github.com:topliceanu/text-summarization.git"
		},
		"main": "./sum.js",
		"engines": {
		"node": ">0.4.12"
		},
		"dependencies": {
		"underscore": "~1.3.1",
		"underscore.string": "~2.0.0",
		"porter-stemmer": "~0.9.1",
		"vows": "0.6.1"
		}
		"name": "sum",
		"version": "0.1.2",
		"description": "text summarization utility",
		"homepage": "https://github.com/topliceanu/text-summarization",
		"license": "MIT",
		"keywords": [
		"summarization",
		"nlp",
		"stemmer",
		"stop-words",
		"express"
		],
		"author": "alex.topliceanu <alexandru.topliceanu@gmail.com> (https://github.com/topliceanu)",
		"repository": {
		"type": "git",
		"url": "git://github.com:topliceanu/text-summarization.git"
		},
		"bugs": {
		"url": "https://github.com/topliceanu/text-summarization/issues"
		},
		"main": "./sum.js",
		"scripts": {
		"test": "./node_modules/.bin/vows --spec --isolate ./tests/node/sum.js",
		"lint": "./node_modules/.bin/jshint sum.js"
		},
		"dependencies": {
		"underscore": "1.7.0",
		"underscore.string": "3.0.3",
		"porter-stemmer": "0.9.1"
		},
		"devDependencies": {
		"vows": "0.8.1",
		"jshint": "2.6.0"
		},
		"optionalDependencies": {},
		"engines": {
		"node": ">0.10.0"
		}
		}

144

README.md

		@@ -1,80 +0,96 @@
		_____ _
		/ ____\| (_)
		\| (___ _ _ _ __ ___ _ ___
		_____ _
		/ ____\| (_)
		\| (___ _ _ _ __ ___ _ ___
		\___ \ \| \| \| \|\| '_ ` _ \ \| \|/ __\|
		____) \|\| \|_\| \|\| \| \| \| \| \| _ \| \|\__ \
		\|_____/ \__,_\|\|_\| \|_\| \|_\|(_)\| \|\|___/
		_/ \|
		\|__/
		_/ \|
		\|__/

		Sum.js
		============
		## Sum.js

		[![NPM](https://nodei.co/npm/sum.png?downloads=true&stars=true)](https://nodei.co/npm/sum/)

		[![NPM](https://nodei.co/npm-dl/sum.png?months=12)](https://nodei.co/npm-dl/sum/)

		\| Indicator \| \|
		\|:-----------------------\|:-------------------------------------------------------------------------\|
		\| continuous integration \| [![Build Status](https://travis-ci.org/topliceanu/sum.svg?branch=master)](https://travis-ci.org/topliceanu/sum) \|
		\| dependency management \| [![Dependency Status](https://david-dm.org/topliceanu/sum.svg?style=flat)](https://david-dm.org/topliceanu/sum) [![devDependency Status](https://david-dm.org/topliceanu/sum/dev-status.svg?style=flat)](https://david-dm.org/topliceanu/sum#info=devDependencies) \|
		\| change log \| [CHANGELOG](https://github.com/topliceanu/sum/blob/master/CHANGELOG.md) [Releases](https://github.com/topliceanu/sum/releases) \|

		A simple function for summarizing text e.g. for automatically determining the sentences that are most relevant to the context of the corpus.
		This library depends on the [underscore](http://documentcloud.github.com/underscore/), [underscore.string](http://epeli.github.com/underscore.string/) and [porter-stemmer](https://github.com/jedp/porter-stemmer) for the moment
		This library depends on the [underscore](http://documentcloud.github.com/underscore/), [underscore.string](http://epeli.github.com/underscore.string/) and [porter-stemmer](https://github.com/jedp/porter-stemmer).

		Install in node.js
		==================
		sudo npm install -g sum
		## Install in node.js

		Install in browser
		==================
		<script src="/lib/underscore.js"></script>
		<script src="/lib/underscore.string.js"></script>
		<script src="/lib/porter-stemmer.js"></script>
		<script src="/sum.browser.js"></script>

		```bash
		sudo npm install -g sum
		```

		Quick Start
		===========
		var sum = require( 'sum' );
		var bigString = "....";
		var abstract = sum({ 'corpus': bigString });

		## Install in browser

		Further Options
		===============
		var sum = require( 'sum' );
		var anotherBigString = "...";
		var abstract = sum({
		/**
		* `corpus`: String - is the string you want to summarize
		*/
		'corpus': anotherBigString,
		```html
		<script src="/lib/underscore.js"></script>
		<script src="/lib/underscore.string.js"></script>
		<script src="/lib/porter-stemmer.js"></script>
		<script src="/sum.js"></script>
		```

		/**
		* `nSentences`: Number - controls the number of sentences from the original text included in the abstact
		*/
		'nSentences': 3,
		## Quick Start

		/**
		* `nWords`: Number - controls the length in words of the nGram output. Output might be larger as some words are ignored in the algorithm but present in the abstract, for ex. prepositions. When `nWords` is set, `nSentences` is ignored
		*/
		'nWords': 5,

		/**
		* `exclude`: Array[String] - sum.js allows you to exclude from the final abstract, sentences or nGrams that contain any of the words in the `exclude` param
		*/
		'exclude': ['polar', 'bear'],
		```javascript
		var sum = require( 'sum' );
		var bigString = "....";
		var abstract = sum({ 'corpus': bigString });
		// `abstract` is an object w/ format `{"summary":String, "sentences":Array<String>}`
		// where summary is the concatenation of the array of sentences.
		```

		/**
		* `emphasise`: Array[String] - forces sum.js to include in the summary the sentences or nGrams that contain any the words specified by `emphasise` param.
		*/
		'emphasise': ['magic']
		});
		## Further Options

		```javascript
		var sum = require( 'sum' );
		var anotherBigString = "...";
		var abstract = sum({
		/**
		* `corpus`: String - is the string you want to summarize
		*/
		'corpus': anotherBigString,

		Running tests
		=============
		Run /tests/browser/specrunner.html in your favourite browser.
		/**
		* `nSentences`: Number - controls the number of sentences from the original text included in the abstact
		*/
		'nSentences': 3,

		To run node tests, make sure you have [vows.js](http://vowsjs.org) installed then run
		/**
		* `nWords`: Number - controls the length in words of the nGram output. Output might be larger as some words are ignored in the algorithm but present in the abstract, for ex. prepositions. When `nWords` is set, `nSentences` is ignored
		*/
		'nWords': 5,

		vows ./tests/node/sum.js
		/**
		* `exclude`: Array[String] - sum.js allows you to exclude from the final abstract, sentences or nGrams that contain any of the words in the `exclude` param
		*/
		'exclude': ['polar', 'bear'],

		/**
		* `emphasise`: Array[String] - forces sum.js to include in the summary the sentences or nGrams that contain any the words specified by `emphasise` param.
		*/
		'emphasise': ['magic']
		});

		Goals
		=====
		//`abstract` is an object with format {'sentences':Array<String>, 'summary':String} where summary is just the concatenation of the sentences, for convenience.
		console.log("The short version of corpus is ", abstract.summary);
		```

		This library is intended to be fully `embeddable`. It's purpose is to be used primarly on the `client-side`.

		## Running tests
		Run `/tests/browser/specrunner.html` in your favourite browser.

		To run node tests, run `npm run test`.


		## Goals

		This library is intended to be fully `embeddable`. It's purpose is to be used primarly on the `client-side`.
		It should be `self-contained` so no API calls to external services.
		@@ -85,5 +101,4 @@ It should be as `light` as possible, both in terms of code size and dependencies and above all it must be `fast`.


		TODO
		====

		## TODO
		1. add tests to verify the correctness of the actual output
		@@ -97,8 +112,7 @@ 2. currenty the output does not preserve the ending chars of the original sentences

		Licence
		=======
		## Licence

		(The MIT License)

		Copyright (c) 2009-2011 Alex Topliceanu <alext@vibetrace.com>
		Copyright (c) Alex Topliceanu <alexandru.topliceanu@gmail.com>

		@@ -105,0 +119,0 @@ Permission is hereby granted, free of charge, to any person obtaining

493

sum.js

		(function (_undef) {
		"use strict";
		"use strict";

		var wrapper = function (_, stemmer) {
		//default values
		var defaults = {
		nSentences: 1,
		exclude: [],
		emphasise: []
		};

		// regexes
		var sentenceDelimiter = /[.!?;]/;
		var nGramDelimiter = /[.,!?;]/;
		var wordDelimiter = /\s/mg;
		var matchJunk = /["#$%&'()*+,\-\/:<=>@\[\\\]\^_`{\|}]/mg ;
		/**
		* Function wraps the library code to allow passing in the
		* dependencies easily.
		* @param {Object} _ - Reference to underscore.js
		* @param {Object} stemmer - Porter stemmer implementation in js.
		* @return {Function} Sumarization function.
		*/
		var wrapper = function (_, stemmer) {

		var stopWords = ["", "a", "about", "above", "above", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also","although","always","am","among", "amongst", "amoungst", "amount", "an", "and", "another", "any","anyhow","anyone","anything","anyway", "anywhere", "are", "around", "as", "at", "back","be","became", "because","become","becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom","but", "by", "call", "can", "cannot", "cant", "co", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven","else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own","part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thickv", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves", "the"];
		// Params default values.
		var defaults = {
		nSentences: 1,
		exclude: [],
		emphasise: []
		};

		// function used to clean sentences before splitting into words
		var clean = function (str) {
		return _(str).chain()
		.unescapeHTML()
		.stripTags()
		.clean()
		.value()
		.replace( matchJunk, '' )
		.toLowerCase();
		};
		// regexes
		var sentenceDelimiter = /[.!?;]/;
		var nGramDelimiter = /[.,!?;]/;
		var wordDelimiter = /\s/mg;
		var matchJunk = /["#$%&'()*+,\-\/:<=>@\[\\\]\^_`{\|}]/mg ;

		// Sentence Module
		var Sentence = function (s) {
		var c = clean( s );
		var all = _.words( c, wordDelimiter );
		var words = _(all).chain()
		// remove stop words
		.filter( function (w) {
		return (stopWords.indexOf( w ) === -1) ;
		})
		// apply stemmer
		.map( function (w) {
		return stemmer( w );
		})
		// collect word frequencies
		.reduce( function (collect, w) {
		collect[w] = collect[w] ? collect[w] + 1 : 1 ;
		return collect;
		}, {}).value();
		// remove a word from this sentence to reduce redundancy in results
		var remove = function (w) {
		return delete words[w];
		};
		return {
		orig: s,
		words: words,
		remove: remove
		};
		};
		// List of words which are ignored when computing top relevant sentences.
		var stopWords = ["", "a", "about", "above", "above", "across", "after",
		"afterwards", "again", "against", "all", "almost", "alone", "along",
		"already", "also","although","always","am","among", "amongst",
		"amoungst", "amount", "an", "and", "another", "any","anyhow",
		"anyone","anything","anyway", "anywhere", "are", "around", "as",
		"at", "back","be","became", "because","become","becomes",
		"becoming", "been", "before", "beforehand", "behind", "being",
		"below", "beside", "besides", "between", "beyond", "bill", "both",
		"bottom","but", "by", "call", "can", "cannot", "cant", "co", "con",
		"could", "couldnt", "cry", "de", "describe", "detail", "do", "done",
		"down", "due", "during", "each", "eg", "eight", "either", "eleven",
		"else", "elsewhere", "empty", "enough", "etc", "even", "ever",
		"every", "everyone", "everything", "everywhere", "except", "few",
		"fifteen", "fify", "fill", "find", "fire", "first", "five", "for",
		"former", "formerly", "forty", "found", "four", "from", "front",
		"full", "further", "get", "give", "go", "had", "has", "hasnt",
		"have", "he", "hence", "her", "here", "hereafter", "hereby",
		"herein", "hereupon", "hers", "herself", "him", "himself", "his",
		"how", "however", "hundred", "ie", "if", "in", "inc", "indeed",
		"interest", "into", "is", "it", "its", "itself", "keep", "last",
		"latter", "latterly", "least", "less", "ltd", "made", "many", "may",
		"me", "meanwhile", "might", "mill", "mine", "more", "moreover",
		"most", "mostly", "move", "much", "must", "my", "myself", "name",
		"namely", "neither", "never", "nevertheless", "next", "nine", "no",
		"nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere",
		"of", "off", "often", "on", "once", "one", "only", "onto", "or",
		"other", "others", "otherwise", "our", "ours", "ourselves", "out",
		"over", "own","part", "per", "perhaps", "please", "put", "rather",
		"re", "same", "see", "seem", "seemed", "seeming", "seems",
		"serious", "several", "she", "should", "show", "side", "since",
		"sincere", "six", "sixty", "so", "some", "somehow", "someone",
		"something", "sometime", "sometimes", "somewhere", "still", "such",
		"system", "take", "ten", "than", "that", "the", "their", "them",
		"themselves", "then", "thence", "there", "thereafter", "thereby",
		"therefore", "therein", "thereupon", "these", "they", "thickv",
		"thin", "third", "this", "those", "though", "three", "through",
		"throughout", "thru", "thus", "to", "together", "too", "top",
		"toward", "towards", "twelve", "twenty", "two", "un", "under",
		"until", "up", "upon", "us", "very", "via", "was", "we", "well",
		"were", "what", "whatever", "when", "whence", "whenever", "where",
		"whereafter", "whereas", "whereby", "wherein", "whereupon",
		"wherever", "whether", "which", "while", "whither", "who",
		"whoever", "whole", "whom", "whose", "why", "will", "with",
		"within", "without", "would", "yet", "you", "your", "yours",
		"yourself", "yourselves", "the"];

		var sum = function (opts){
		/**
		* Function used to clean sentences before splitting into words
		* @param {String} str
		* @return {String}
		*/
		var clean = function (str) {
		return _(str).chain()
		.unescapeHTML()
		.stripTags()
		.clean()
		.value()
		.replace( matchJunk, '' )
		.toLowerCase();
		};

		// handle options
		opts = _.extend( {}, defaults, opts );
		opts.corpus = opts.corpus \|\| _undef;
		if (opts.corpus === _undef) throw Error( 'No input corpus' );
		if (opts.nWords !== _undef && !_.isNumber(opts.nWords)) throw Error('Bad value for nWords');
		/**
		* Sentence Module. Creates object with format:
		* {orig:String, words:Array<String>, remove:Function}
		*/
		var Sentence = function (s) {
		var c = clean( s );
		var all = _.words( c, wordDelimiter );
		var words = _(all).chain()
		// remove stop words
		.filter( function (w) {
		return (stopWords.indexOf( w ) === -1) ;
		})
		// apply stemmer
		.map( function (w) {
		return stemmer( w );
		})
		// collect word frequencies
		.reduce( function (collect, w) {
		collect[w] = collect[w] ? collect[w] + 1 : 1 ;
		return collect;
		}, {}).value();
		// remove a word from this sentence to reduce redundancy in results
		var remove = function (w) {
		return delete words[w];
		};
		return {
		orig: s,
		words: words,
		remove: remove
		};
		};

		// clean corpus
		var s = opts.corpus.split( sentenceDelimiter ); // TODO: keep the sentence ending chars
		var sentences = _(s).map( function (s) {
		return new Sentence(s);
		});
		/**
		* Text summarization function.
		* @param {Object} opts
		* @param {String} opts.corpus - String to summarize.
		* @param {String} opts.nWords - Number of words the summary should have.
		* @param {String} opts.nSentences - Number of sentences the summary should have.
		* @return {Object} output
		* @return {Array<String>} output.sentences - The summary sentences in
		* order of relevance to the input text.
		* @return {String} output.summary - the concatenation of the summary
		* sentences for convenience.
		*/
		var sum = function (opts){
		// Handle options.
		opts = _.extend( {}, defaults, opts );
		opts.corpus = opts.corpus \|\| _undef;
		if (opts.corpus === _undef) {
		throw Error( 'No input corpus' );
		}
		if (opts.nWords !== _undef && !_.isNumber(opts.nWords)) {
		throw Error('Bad value for nWords');
		}

		// Clean corpus.
		var s = opts.corpus.split(sentenceDelimiter);
		var sentences = _(s).map( function (s) {
		return new Sentence(s);
		});

		// splits the sentences into nGrams then applies the same algorithm
		if (opts.nWords) {
		// Splits the sentences into nGrams then applies the same algorithm.
		if (opts.nWords) {

		// `opts.nSentences` is ignored, output size is determined by lexem size
		opts.nSentences = 1;
		// `opts.nSentences` is ignored, output size is determined by lexem size.
		opts.nSentences = 1;

		var nGrams = _(sentences).reduce( function (collect, s) {
		var orig = s.orig;
		var partials = _(s.words).reduce( function (memo, freq, w) {
		var pos = orig.indexOf(' ');
		if (pos === -1) pos = orig.length;
		var partial = orig.substr(0, pos);
		orig = orig.substr(pos + 1);
		if (partial !== '') memo.push(partial);
		return memo;
		}, []);
		if (partials.length <= opts.nWords) {
		var newSentence = new Sentence( partials.join(' '));
		collect.push( newSentence );
		return collect;
		}
		var i = 0, j = 0, n = partials.length - opts.nWords, m=partials.length, tmp;
		for (i = 0; i < n; i ++) {
		var tmp = ''
		for (j = i; j < i+opts.nWords; j ++) {
		tmp += partials[j] + ' ';
		}
		var newSentence = new Sentence( tmp );
		collect.push( newSentence );
		}
		return collect;
		}, []);
		sentences = nGrams;
		}
		var nGrams = _(sentences).reduce( function (collect, s) {
		var orig = s.orig;
		var partials = _(s.words).reduce( function (memo, freq, w) {
		var pos = orig.indexOf(' ');
		if (pos === -1) {
		pos = orig.length;
		}
		var partial = orig.substr(0, pos);
		orig = orig.substr(pos + 1);
		if (partial !== '') {
		memo.push(partial);
		}
		return memo;
		}, []);
		if (partials.length <= opts.nWords) {
		var newSentence = new Sentence( partials.join(' '));
		collect.push( newSentence );
		return collect;
		}
		var i = 0,
		j = 0,
		n = partials.length - opts.nWords,
		m = partials.length,
		tmp;
		for (i = 0; i < n; i ++) {
		var tmp = ''
		for (j = i; j < i+opts.nWords; j ++) {
		tmp += partials[j] + ' ';
		}
		var newSentence = new Sentence(tmp);
		collect.push(newSentence);
		}
		return collect;
		}, []);
		sentences = nGrams;
		}


		// return all sentences that contain a givven word
		var containing = function (w) {
		return _(sentences).filter( function (s) {
		return (s.words[w] !== undefined) ;
		});
		};

		// if summary must exclude words in opts.exclude remove sentences that contain those words
		if ( _.isArray(opts.exclude) && opts.exclude.length !== 0) {
		var excludes = _(opts.exclude).map( function (w) {
		return stemmer(clean(w));
		});
		sentences = _(sentences).filter( function (s) {
		var words = _(s.words).keys();
		return (_.intersection( words, excludes ).length === 0);
		});
		}
		/**
		* Return all sentences that contain a givven word.
		* @param {String} w - word
		* @return {Array<Object>}
		*/
		var containing = function (w) {
		return _(sentences).filter( function (s) {
		return (s.words[w] !== undefined) ;
		});
		};


		var summary = [] ;
		var counter = 0;
		// If summary must exclude words in opts.exclude remove sentences
		// that contain those words.
		if ( _.isArray(opts.exclude) && opts.exclude.length !== 0) {
		var excludes = _(opts.exclude).map( function (w) {
		return stemmer(clean(w));
		});
		sentences = _(sentences).filter( function (s) {
		var words = _(s.words).keys();
		return (_.intersection( words, excludes ).length === 0);
		});
		}

		// extract sentences in order of their relevance
		while (true) {
		var N = sentences.length;
		var summary = [];
		var counter = 0;

		// builds a hash of all words with global frequencies
		var words = _(sentences).reduce( function (collect,s) {
		_(s.words).each( function (count, w) {
		collect[w] = collect[w] ? collect[w] + count : count ;
		});
		return collect;
		}, {});

		// if summary must have the words in opts.emphasise
		var emphasise = [];
		if ( _.isArray(opts.emphasise) && opts.emphasise.length !== 0) {
		emphasise = _(opts.emphasise).map( function (w) {
		return stemmer(clean(w));
		});
		}
		// Extract sentences in order of their relevance.
		while (true) {
		var N = sentences.length;

		//calculate relevance for each sentence
		_(sentences).each( function (s) {
		var relevance = _(s.words).reduce( function (memo, freq, w) {
		var local = Math.log( 1 + freq );
		var global = Math.log( N / containing(w).length );
		return memo = memo + (local * global);
		}, 0);

		// if current sentence containes emphasised words, bumb up the relevance
		var bump = _.intersection(emphasise, _(s.words).keys()).length;
		relevance += bump * 1000; //big enough to push it in front
		// Builds a hash of all words with global frequencies.
		var words = _(sentences).reduce( function (collect,s) {
		_(s.words).each( function (count, w) {
		collect[w] = collect[w] ? collect[w] + count : count ;
		});
		return collect;
		}, {});

		s.relevance = relevance;
		})
		// If summary must have the words in opts.emphasise.
		var emphasise = [];
		if ( _.isArray(opts.emphasise) && opts.emphasise.length !== 0) {
		emphasise = _(opts.emphasise).map( function (w) {
		return stemmer(clean(w));
		});
		}

		// highest relevance sentence
		var highest = _(sentences).max( function (s) {
		return s.relevance;
		});
		// Calculate relevance for each sentence.
		_(sentences).each( function (s) {
		var relevance = _(s.words).reduce( function (memo, freq, w) {
		var local = Math.log(1 + freq);
		var global = Math.log(N / containing(w).length);
		return memo = memo + (local * global);
		}, 0);

		// remove words from the remaining sentences to reduce redundancy
		sentences = _(sentences).chain()
		.without(highest)
		.map( function (s) {
		_(highest.words).each( function (w) {
		s.remove( w );
		});
		return s;
		})
		.value();
		// If current sentence containes emphasised words,
		// bumb up the relevance.
		var bump = _.intersection(emphasise, _(s.words).keys()).length;
		relevance += bump * 1000;

		summary.push( highest.orig ) ;
		counter += 1;
		s.relevance = relevance;
		})

		var stop = (counter === opts.nSentences \|\| sentences.length === 0);
		if (stop) break;
		}//~ end while
		return {
		'summary': summary.join('.'),
		'sentences': summary
		};
		};
		return sum;
		};

		// Highest relevance sentence.
		var highest = _(sentences).max( function (s) {
		return s.relevance;
		});

		// exports the `sum` function in node.js
		if (typeof exports !== 'undefined' && typeof module !== 'undefined' && module.exports && typeof require !== 'undefined') {
		var stemmer = require( 'porter-stemmer' ).stemmer;
		var _ = require( 'underscore' );
		_.str = require( 'underscore.string' );
		_.mixin( _.str.exports() );
		module.exports = wrapper(_, stemmer);
		}
		// exports `sum` to AMD module, defining dependencies
		else if (typeof define === 'function' && define.amd) {
		define('sum', [
		'underscore',
		'underscore.string',
		'porter-stemmer'
		], function(_, str, stemmer) {
		return wrapper(_, stemmer);
		});
		}
		// export in browser
		else if (typeof this !== 'undefined' && this._ && this.stemmer) {
		this._.mixin( this._.str.exports() );
		this.sum = wrapper(this._, this.stemmer);
		}
		else {
		throw Error( 'unsupported js environment detected' );
		}
		// Remove words from the remaining sentences to reduce redundancy.
		sentences = _(sentences).chain()
		.without(highest)
		.map( function (s) {
		_(highest.words).each( function (w) {
		s.remove( w );
		});
		return s;
		})
		.value();

		summary.push( highest.orig ) ;
		counter += 1;

		var stop = (counter === opts.nSentences \|\| sentences.length === 0);
		if (stop) break;
		}

		return {
		'summary': summary.join('.'),
		'sentences': summary
		};
		};

		return sum;
		};


		// exports the `sum` function in node.js
		if (typeof exports !== 'undefined'
		&& typeof module !== 'undefined'
		&& module.exports
		&& typeof require !== 'undefined') {
		var stemmer = require( 'porter-stemmer' ).stemmer;
		var _ = require( 'underscore' );
		_.str = require( 'underscore.string' );
		_.mixin( _.str.exports() );
		module.exports = wrapper(_, stemmer);
		}
		// exports `sum` to AMD module, defining dependencies
		else if (typeof define === 'function' && define.amd) {
		define('sum', [
		'underscore',
		'underscore.string',
		'porter-stemmer'
		], function(_, str, stemmer) {
		return wrapper(_, stemmer);
		});
		}
		// export in browser
		else if (typeof this !== 'undefined'
		&& this._ && this.stemmer) {
		this._.mixin( this._.str.exports() );
		this.sum = wrapper(this._, this.stemmer);
		}
		else {
		throw Error( 'Unsupported js environment detected' );
		}

		}).call(this);

tests/browser/specs/SpecSum.js

		describe( 'test sum\' params', function () {
		it( 'should return one sentence', function () {
		var corpus = corpora[1];
		var actual = sum({ 'corpus': corpus.text, 'nSentences': 1 });
		var expected = 1;
		expect(actual.sentences.length).toEqual( expected );
		});
		it( 'should return two sentences', function () {
		var corpus = corpora[1];
		var actual = sum({ 'corpus': corpus.text, 'nSentences': 3 });
		var expected = 3;
		expect(actual.sentences.length).toEqual( expected );
		});
		it( 'should ignore sentences that have the word `bladder` in them', function () {
		var corpus = corpora[1];
		var sum1 = sum({ 'corpus': corpus.text, 'nSentences': 1, 'exclude': ['bladder', 'Chubb'] });
		var sum2 = sum({ 'corpus': corpus.text, 'nSentences': 1 });
		expect( sum1.summary ).not.toEqual( sum2.summary );
		});
		it( 'should have the emphasisted word `drug` in the abstract', function () {
		var corpus = corpora[1];
		var sum1 = sum({ 'corpus': corpus.text, 'nSentences': 1, 'emphasise': ['Drug'] });
		var actual = _.str.include( sum1.summary, 'Drug' );
		expect( actual ).toBe( true );
		});
		it( 'should return one sentence', function () {
		var corpus = corpora[1];
		var actual = sum({ 'corpus': corpus.text, 'nSentences': 1 });
		var expected = 1;
		expect(actual.sentences.length).toEqual( expected );
		});
		it( 'should return two sentences', function () {
		var corpus = corpora[1];
		var actual = sum({ 'corpus': corpus.text, 'nSentences': 3 });
		var expected = 3;
		expect(actual.sentences.length).toEqual( expected );
		});
		it( 'should ignore sentences that have the word `bladder` in them', function () {
		var corpus = corpora[1];
		var sum1 = sum({ 'corpus': corpus.text, 'nSentences': 1, 'exclude': ['bladder', 'Chubb'] });
		var sum2 = sum({ 'corpus': corpus.text, 'nSentences': 1 });
		expect( sum1.summary ).not.toEqual( sum2.summary );
		});
		it( 'should have the emphasisted word `drug` in the abstract', function () {
		var corpus = corpora[1];
		var sum1 = sum({ 'corpus': corpus.text, 'nSentences': 1, 'emphasise': ['Drug'] });
		var actual = _.str.include( sum1.summary, 'Drug' );
		expect( actual ).toBe( true );
		});
		});
		describe( 'summarize.js basic output test', function () {
		corpora.forEach( function (corpus) {
		it( 'should calculate the summary', function () {
		var actual = sum({ 'corpus': corpus.text, 'nSentences': 3 });
		var expected = 3;
		expect(actual.sentences.length).toEqual( expected );
		});
		});
		corpora.forEach( function (corpus) {
		it( 'should calculate the summary', function () {
		var actual = sum({ 'corpus': corpus.text, 'nSentences': 3 });
		var expected = 3;
		expect(actual.sentences.length).toEqual( expected );
		});
		});
		});
		describe( 'test nWords params in action', function () {
		corpora.forEach( function (corpus) {
		it( 'should calculate the summary', function () {
		var actual = sum({ 'corpus': corpus.text, 'nWords': 5 });
		expect(actual.sentences.length).not.toEqual('');
		});
		});
		corpora.forEach( function (corpus) {
		it( 'should calculate the summary', function () {
		var actual = sum({ 'corpus': corpus.text, 'nWords': 5 });
		expect(actual.sentences.length).not.toEqual('');
		});
		});
		});
		//TODO add tests to validate correctness of the actual output

tests/node/sum.js

		@@ -10,31 +10,31 @@ var vows = require( 'vows' );
		.addBatch({
		'when summarizing a text': {
		topic: function () {
		var s = sum({
		'corpus': corpus,
		'nSentences': 3
		});
		return s.summary;
		},
		'it should output the abstract containing the most relevant sentences for the meaning of the initial text': function (error, summary) {
		assert.ifError( error );
		assert.isString( summary );
		}
		}
		'when summarizing a text': {
		topic: function () {
		var s = sum({
		'corpus': corpus,
		'nSentences': 3
		});
		return s.summary;
		},
		'it should output the abstract containing the most relevant sentences for the meaning of the initial text': function (error, summary) {
		assert.ifError( error );
		assert.isString( summary );
		}
		}
		})
		.addBatch({
		'when summarizing a text': {
		topic: function () {
		var s = sum({
		'corpus': corpus,
		'nWords': 5
		});
		return s.summary;
		},
		'it should output the abstract containing the most relevant sentences for the meaning of the initial text': function (error, summary) {
		assert.ifError( error );
		assert.isString( summary );
		}
		}
		'when summarizing a text': {
		topic: function () {
		var s = sum({
		'corpus': corpus,
		'nWords': 5
		});
		return s.summary;
		},
		'it should output the abstract containing the most relevant sentences for the meaning of the initial text': function (error, summary) {
		assert.ifError( error );
		assert.isString( summary );
		}
		}
		})
		.export(module);

.npmignore

Sorry, the diff of this file is not supported yet

LICENSE

Sorry, the diff of this file is not supported yet

tests/browser/specrunner.html

Sorry, the diff of this file is not supported yet

sum - npm Package Compare versions

Fixed alerts

Improved metrics

Worsened metrics

Dependency changes