🚀 Big News: Socket Acquires Coana to Bring Reachability Analysis to Every Appsec Team.Learn more
Socket
Book a DemoInstallSign in
Socket

sum

Package Overview
Dependencies
Maintainers
1
Versions
5
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sum - npm Package Compare versions

Comparing version

to
0.1.2

.travis.yml

57

package.json
{
"author": "alex.topliceanu <alext@vibetrace.com> (https://github.com/topliceanu)",
"name": "sum",
"description": "text summarization utility",
"version": "0.1.1",
"homepage": "https://github.com/topliceanu/text-summarization",
"repository": {
"type": "git",
"url": "git@github.com:topliceanu/text-summarization.git"
},
"main": "./sum.js",
"engines": {
"node": ">0.4.12"
},
"dependencies": {
"underscore": "~1.3.1",
"underscore.string": "~2.0.0",
"porter-stemmer": "~0.9.1",
"vows": "0.6.1"
}
"name": "sum",
"version": "0.1.2",
"description": "text summarization utility",
"homepage": "https://github.com/topliceanu/text-summarization",
"license": "MIT",
"keywords": [
"summarization",
"nlp",
"stemmer",
"stop-words",
"express"
],
"author": "alex.topliceanu <alexandru.topliceanu@gmail.com> (https://github.com/topliceanu)",
"repository": {
"type": "git",
"url": "git://github.com:topliceanu/text-summarization.git"
},
"bugs": {
"url": "https://github.com/topliceanu/text-summarization/issues"
},
"main": "./sum.js",
"scripts": {
"test": "./node_modules/.bin/vows --spec --isolate ./tests/node/sum.js",
"lint": "./node_modules/.bin/jshint sum.js"
},
"dependencies": {
"underscore": "1.7.0",
"underscore.string": "3.0.3",
"porter-stemmer": "0.9.1"
},
"devDependencies": {
"vows": "0.8.1",
"jshint": "2.6.0"
},
"optionalDependencies": {},
"engines": {
"node": ">0.10.0"
}
}

@@ -1,80 +0,96 @@

_____ _
/ ____| (_)
| (___ _ _ _ __ ___ _ ___
_____ _
/ ____| (_)
| (___ _ _ _ __ ___ _ ___
\___ \ | | | || '_ ` _ \ | |/ __|
____) || |_| || | | | | | _ | |\__ \
|_____/ \__,_||_| |_| |_|(_)| ||___/
_/ |
|__/
_/ |
|__/
Sum.js
============
## Sum.js
[![NPM](https://nodei.co/npm/sum.png?downloads=true&stars=true)](https://nodei.co/npm/sum/)
[![NPM](https://nodei.co/npm-dl/sum.png?months=12)](https://nodei.co/npm-dl/sum/)
| Indicator | |
|:-----------------------|:-------------------------------------------------------------------------|
| continuous integration | [![Build Status](https://travis-ci.org/topliceanu/sum.svg?branch=master)](https://travis-ci.org/topliceanu/sum) |
| dependency management | [![Dependency Status](https://david-dm.org/topliceanu/sum.svg?style=flat)](https://david-dm.org/topliceanu/sum) [![devDependency Status](https://david-dm.org/topliceanu/sum/dev-status.svg?style=flat)](https://david-dm.org/topliceanu/sum#info=devDependencies) |
| change log | [CHANGELOG](https://github.com/topliceanu/sum/blob/master/CHANGELOG.md) [Releases](https://github.com/topliceanu/sum/releases) |
A simple function for summarizing text e.g. for automatically determining the sentences that are most relevant to the context of the corpus.
This library depends on the [underscore](http://documentcloud.github.com/underscore/), [underscore.string](http://epeli.github.com/underscore.string/) and [porter-stemmer](https://github.com/jedp/porter-stemmer) for the moment
This library depends on the [underscore](http://documentcloud.github.com/underscore/), [underscore.string](http://epeli.github.com/underscore.string/) and [porter-stemmer](https://github.com/jedp/porter-stemmer).
Install in node.js
==================
sudo npm install -g sum
## Install in node.js
Install in browser
==================
<script src="/lib/underscore.js"></script>
<script src="/lib/underscore.string.js"></script>
<script src="/lib/porter-stemmer.js"></script>
<script src="/sum.browser.js"></script>
```bash
sudo npm install -g sum
```
Quick Start
===========
var sum = require( 'sum' );
var bigString = "....";
var abstract = sum({ 'corpus': bigString });
## Install in browser
Further Options
===============
var sum = require( 'sum' );
var anotherBigString = "...";
var abstract = sum({
/**
* `corpus`: String - is the string you want to summarize
*/
'corpus': anotherBigString,
```html
<script src="/lib/underscore.js"></script>
<script src="/lib/underscore.string.js"></script>
<script src="/lib/porter-stemmer.js"></script>
<script src="/sum.js"></script>
```
/**
* `nSentences`: Number - controls the number of sentences from the original text included in the abstact
*/
'nSentences': 3,
## Quick Start
/**
* `nWords`: Number - controls the length in words of the nGram output. Output might be larger as some words are ignored in the algorithm but present in the abstract, for ex. prepositions. When `nWords` is set, `nSentences` is ignored
*/
'nWords': 5,
/**
* `exclude`: Array[String] - sum.js allows you to exclude from the final abstract, sentences or nGrams that contain any of the words in the `exclude` param
*/
'exclude': ['polar', 'bear'],
```javascript
var sum = require( 'sum' );
var bigString = "....";
var abstract = sum({ 'corpus': bigString });
// `abstract` is an object w/ format `{"summary":String, "sentences":Array<String>}`
// where summary is the concatenation of the array of sentences.
```
/**
* `emphasise`: Array[String] - forces sum.js to include in the summary the sentences or nGrams that contain any the words specified by `emphasise` param.
*/
'emphasise': ['magic']
});
## Further Options
```javascript
var sum = require( 'sum' );
var anotherBigString = "...";
var abstract = sum({
/**
* `corpus`: String - is the string you want to summarize
*/
'corpus': anotherBigString,
Running tests
=============
Run /tests/browser/specrunner.html in your favourite browser.
/**
* `nSentences`: Number - controls the number of sentences from the original text included in the abstact
*/
'nSentences': 3,
To run node tests, make sure you have [vows.js](http://vowsjs.org) installed then run
/**
* `nWords`: Number - controls the length in words of the nGram output. Output might be larger as some words are ignored in the algorithm but present in the abstract, for ex. prepositions. When `nWords` is set, `nSentences` is ignored
*/
'nWords': 5,
vows ./tests/node/sum.js
/**
* `exclude`: Array[String] - sum.js allows you to exclude from the final abstract, sentences or nGrams that contain any of the words in the `exclude` param
*/
'exclude': ['polar', 'bear'],
/**
* `emphasise`: Array[String] - forces sum.js to include in the summary the sentences or nGrams that contain any the words specified by `emphasise` param.
*/
'emphasise': ['magic']
});
Goals
=====
//`abstract` is an object with format {'sentences':Array<String>, 'summary':String} where summary is just the concatenation of the sentences, for convenience.
console.log("The short version of corpus is ", abstract.summary);
```
This library is intended to be fully `embeddable`. It's purpose is to be used primarly on the `client-side`.
## Running tests
Run `/tests/browser/specrunner.html` in your favourite browser.
To run node tests, run `npm run test`.
## Goals
This library is intended to be fully `embeddable`. It's purpose is to be used primarly on the `client-side`.
It should be `self-contained` so no API calls to external services.

@@ -85,5 +101,4 @@ It should be as `light` as possible, both in terms of code size and dependencies and above all it must be `fast`.

TODO
====
## TODO
1. add tests to verify the correctness of the actual output

@@ -97,8 +112,7 @@ 2. currenty the output does not preserve the ending chars of the original sentences

Licence
=======
## Licence
(The MIT License)
Copyright (c) 2009-2011 Alex Topliceanu <alext@vibetrace.com>
Copyright (c) Alex Topliceanu <alexandru.topliceanu@gmail.com>

@@ -105,0 +119,0 @@ Permission is hereby granted, free of charge, to any person obtaining

(function (_undef) {
"use strict";
"use strict";
var wrapper = function (_, stemmer) {
//default values
var defaults = {
nSentences: 1,
exclude: [],
emphasise: []
};
// regexes
var sentenceDelimiter = /[.!?;]/;
var nGramDelimiter = /[.,!?;]/;
var wordDelimiter = /\s/mg;
var matchJunk = /["#$%&'()*+,\-\/:<=>@\[\\\]\^_`{|}]/mg ;
/**
* Function wraps the library code to allow passing in the
* dependencies easily.
* @param {Object} _ - Reference to underscore.js
* @param {Object} stemmer - Porter stemmer implementation in js.
* @return {Function} Sumarization function.
*/
var wrapper = function (_, stemmer) {
var stopWords = ["", "a", "about", "above", "above", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also","although","always","am","among", "amongst", "amoungst", "amount", "an", "and", "another", "any","anyhow","anyone","anything","anyway", "anywhere", "are", "around", "as", "at", "back","be","became", "because","become","becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom","but", "by", "call", "can", "cannot", "cant", "co", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven","else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own","part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thickv", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves", "the"];
// Params default values.
var defaults = {
nSentences: 1,
exclude: [],
emphasise: []
};
// function used to clean sentences before splitting into words
var clean = function (str) {
return _(str).chain()
.unescapeHTML()
.stripTags()
.clean()
.value()
.replace( matchJunk, '' )
.toLowerCase();
};
// regexes
var sentenceDelimiter = /[.!?;]/;
var nGramDelimiter = /[.,!?;]/;
var wordDelimiter = /\s/mg;
var matchJunk = /["#$%&'()*+,\-\/:<=>@\[\\\]\^_`{|}]/mg ;
// Sentence Module
var Sentence = function (s) {
var c = clean( s );
var all = _.words( c, wordDelimiter );
var words = _(all).chain()
// remove stop words
.filter( function (w) {
return (stopWords.indexOf( w ) === -1) ;
})
// apply stemmer
.map( function (w) {
return stemmer( w );
})
// collect word frequencies
.reduce( function (collect, w) {
collect[w] = collect[w] ? collect[w] + 1 : 1 ;
return collect;
}, {}).value();
// remove a word from this sentence to reduce redundancy in results
var remove = function (w) {
return delete words[w];
};
return {
orig: s,
words: words,
remove: remove
};
};
// List of words which are ignored when computing top relevant sentences.
var stopWords = ["", "a", "about", "above", "above", "across", "after",
"afterwards", "again", "against", "all", "almost", "alone", "along",
"already", "also","although","always","am","among", "amongst",
"amoungst", "amount", "an", "and", "another", "any","anyhow",
"anyone","anything","anyway", "anywhere", "are", "around", "as",
"at", "back","be","became", "because","become","becomes",
"becoming", "been", "before", "beforehand", "behind", "being",
"below", "beside", "besides", "between", "beyond", "bill", "both",
"bottom","but", "by", "call", "can", "cannot", "cant", "co", "con",
"could", "couldnt", "cry", "de", "describe", "detail", "do", "done",
"down", "due", "during", "each", "eg", "eight", "either", "eleven",
"else", "elsewhere", "empty", "enough", "etc", "even", "ever",
"every", "everyone", "everything", "everywhere", "except", "few",
"fifteen", "fify", "fill", "find", "fire", "first", "five", "for",
"former", "formerly", "forty", "found", "four", "from", "front",
"full", "further", "get", "give", "go", "had", "has", "hasnt",
"have", "he", "hence", "her", "here", "hereafter", "hereby",
"herein", "hereupon", "hers", "herself", "him", "himself", "his",
"how", "however", "hundred", "ie", "if", "in", "inc", "indeed",
"interest", "into", "is", "it", "its", "itself", "keep", "last",
"latter", "latterly", "least", "less", "ltd", "made", "many", "may",
"me", "meanwhile", "might", "mill", "mine", "more", "moreover",
"most", "mostly", "move", "much", "must", "my", "myself", "name",
"namely", "neither", "never", "nevertheless", "next", "nine", "no",
"nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere",
"of", "off", "often", "on", "once", "one", "only", "onto", "or",
"other", "others", "otherwise", "our", "ours", "ourselves", "out",
"over", "own","part", "per", "perhaps", "please", "put", "rather",
"re", "same", "see", "seem", "seemed", "seeming", "seems",
"serious", "several", "she", "should", "show", "side", "since",
"sincere", "six", "sixty", "so", "some", "somehow", "someone",
"something", "sometime", "sometimes", "somewhere", "still", "such",
"system", "take", "ten", "than", "that", "the", "their", "them",
"themselves", "then", "thence", "there", "thereafter", "thereby",
"therefore", "therein", "thereupon", "these", "they", "thickv",
"thin", "third", "this", "those", "though", "three", "through",
"throughout", "thru", "thus", "to", "together", "too", "top",
"toward", "towards", "twelve", "twenty", "two", "un", "under",
"until", "up", "upon", "us", "very", "via", "was", "we", "well",
"were", "what", "whatever", "when", "whence", "whenever", "where",
"whereafter", "whereas", "whereby", "wherein", "whereupon",
"wherever", "whether", "which", "while", "whither", "who",
"whoever", "whole", "whom", "whose", "why", "will", "with",
"within", "without", "would", "yet", "you", "your", "yours",
"yourself", "yourselves", "the"];
var sum = function (opts){
/**
* Function used to clean sentences before splitting into words
* @param {String} str
* @return {String}
*/
var clean = function (str) {
return _(str).chain()
.unescapeHTML()
.stripTags()
.clean()
.value()
.replace( matchJunk, '' )
.toLowerCase();
};
// handle options
opts = _.extend( {}, defaults, opts );
opts.corpus = opts.corpus || _undef;
if (opts.corpus === _undef) throw Error( 'No input corpus' );
if (opts.nWords !== _undef && !_.isNumber(opts.nWords)) throw Error('Bad value for nWords');
/**
* Sentence Module. Creates object with format:
* {orig:String, words:Array<String>, remove:Function}
*/
var Sentence = function (s) {
var c = clean( s );
var all = _.words( c, wordDelimiter );
var words = _(all).chain()
// remove stop words
.filter( function (w) {
return (stopWords.indexOf( w ) === -1) ;
})
// apply stemmer
.map( function (w) {
return stemmer( w );
})
// collect word frequencies
.reduce( function (collect, w) {
collect[w] = collect[w] ? collect[w] + 1 : 1 ;
return collect;
}, {}).value();
// remove a word from this sentence to reduce redundancy in results
var remove = function (w) {
return delete words[w];
};
return {
orig: s,
words: words,
remove: remove
};
};
// clean corpus
var s = opts.corpus.split( sentenceDelimiter ); // TODO: keep the sentence ending chars
var sentences = _(s).map( function (s) {
return new Sentence(s);
});
/**
* Text summarization function.
* @param {Object} opts
* @param {String} opts.corpus - String to summarize.
* @param {String} opts.nWords - Number of words the summary should have.
* @param {String} opts.nSentences - Number of sentences the summary should have.
* @return {Object} output
* @return {Array<String>} output.sentences - The summary sentences in
* order of relevance to the input text.
* @return {String} output.summary - the concatenation of the summary
* sentences for convenience.
*/
var sum = function (opts){
// Handle options.
opts = _.extend( {}, defaults, opts );
opts.corpus = opts.corpus || _undef;
if (opts.corpus === _undef) {
throw Error( 'No input corpus' );
}
if (opts.nWords !== _undef && !_.isNumber(opts.nWords)) {
throw Error('Bad value for nWords');
}
// Clean corpus.
var s = opts.corpus.split(sentenceDelimiter);
var sentences = _(s).map( function (s) {
return new Sentence(s);
});
// splits the sentences into nGrams then applies the same algorithm
if (opts.nWords) {
// Splits the sentences into nGrams then applies the same algorithm.
if (opts.nWords) {
// `opts.nSentences` is ignored, output size is determined by lexem size
opts.nSentences = 1;
// `opts.nSentences` is ignored, output size is determined by lexem size.
opts.nSentences = 1;
var nGrams = _(sentences).reduce( function (collect, s) {
var orig = s.orig;
var partials = _(s.words).reduce( function (memo, freq, w) {
var pos = orig.indexOf(' ');
if (pos === -1) pos = orig.length;
var partial = orig.substr(0, pos);
orig = orig.substr(pos + 1);
if (partial !== '') memo.push(partial);
return memo;
}, []);
if (partials.length <= opts.nWords) {
var newSentence = new Sentence( partials.join(' '));
collect.push( newSentence );
return collect;
}
var i = 0, j = 0, n = partials.length - opts.nWords, m=partials.length, tmp;
for (i = 0; i < n; i ++) {
var tmp = ''
for (j = i; j < i+opts.nWords; j ++) {
tmp += partials[j] + ' ';
}
var newSentence = new Sentence( tmp );
collect.push( newSentence );
}
return collect;
}, []);
sentences = nGrams;
}
var nGrams = _(sentences).reduce( function (collect, s) {
var orig = s.orig;
var partials = _(s.words).reduce( function (memo, freq, w) {
var pos = orig.indexOf(' ');
if (pos === -1) {
pos = orig.length;
}
var partial = orig.substr(0, pos);
orig = orig.substr(pos + 1);
if (partial !== '') {
memo.push(partial);
}
return memo;
}, []);
if (partials.length <= opts.nWords) {
var newSentence = new Sentence( partials.join(' '));
collect.push( newSentence );
return collect;
}
var i = 0,
j = 0,
n = partials.length - opts.nWords,
m = partials.length,
tmp;
for (i = 0; i < n; i ++) {
var tmp = ''
for (j = i; j < i+opts.nWords; j ++) {
tmp += partials[j] + ' ';
}
var newSentence = new Sentence(tmp);
collect.push(newSentence);
}
return collect;
}, []);
sentences = nGrams;
}
// return all sentences that contain a givven word
var containing = function (w) {
return _(sentences).filter( function (s) {
return (s.words[w] !== undefined) ;
});
};
// if summary must exclude words in opts.exclude remove sentences that contain those words
if ( _.isArray(opts.exclude) && opts.exclude.length !== 0) {
var excludes = _(opts.exclude).map( function (w) {
return stemmer(clean(w));
});
sentences = _(sentences).filter( function (s) {
var words = _(s.words).keys();
return (_.intersection( words, excludes ).length === 0);
});
}
/**
* Return all sentences that contain a givven word.
* @param {String} w - word
* @return {Array<Object>}
*/
var containing = function (w) {
return _(sentences).filter( function (s) {
return (s.words[w] !== undefined) ;
});
};
var summary = [] ;
var counter = 0;
// If summary must exclude words in opts.exclude remove sentences
// that contain those words.
if ( _.isArray(opts.exclude) && opts.exclude.length !== 0) {
var excludes = _(opts.exclude).map( function (w) {
return stemmer(clean(w));
});
sentences = _(sentences).filter( function (s) {
var words = _(s.words).keys();
return (_.intersection( words, excludes ).length === 0);
});
}
// extract sentences in order of their relevance
while (true) {
var N = sentences.length;
var summary = [];
var counter = 0;
// builds a hash of all words with global frequencies
var words = _(sentences).reduce( function (collect,s) {
_(s.words).each( function (count, w) {
collect[w] = collect[w] ? collect[w] + count : count ;
});
return collect;
}, {});
// if summary must have the words in opts.emphasise
var emphasise = [];
if ( _.isArray(opts.emphasise) && opts.emphasise.length !== 0) {
emphasise = _(opts.emphasise).map( function (w) {
return stemmer(clean(w));
});
}
// Extract sentences in order of their relevance.
while (true) {
var N = sentences.length;
//calculate relevance for each sentence
_(sentences).each( function (s) {
var relevance = _(s.words).reduce( function (memo, freq, w) {
var local = Math.log( 1 + freq );
var global = Math.log( N / containing(w).length );
return memo = memo + (local * global);
}, 0);
// if current sentence containes emphasised words, bumb up the relevance
var bump = _.intersection(emphasise, _(s.words).keys()).length;
relevance += bump * 1000; //big enough to push it in front
// Builds a hash of all words with global frequencies.
var words = _(sentences).reduce( function (collect,s) {
_(s.words).each( function (count, w) {
collect[w] = collect[w] ? collect[w] + count : count ;
});
return collect;
}, {});
s.relevance = relevance;
})
// If summary must have the words in opts.emphasise.
var emphasise = [];
if ( _.isArray(opts.emphasise) && opts.emphasise.length !== 0) {
emphasise = _(opts.emphasise).map( function (w) {
return stemmer(clean(w));
});
}
// highest relevance sentence
var highest = _(sentences).max( function (s) {
return s.relevance;
});
// Calculate relevance for each sentence.
_(sentences).each( function (s) {
var relevance = _(s.words).reduce( function (memo, freq, w) {
var local = Math.log(1 + freq);
var global = Math.log(N / containing(w).length);
return memo = memo + (local * global);
}, 0);
// remove words from the remaining sentences to reduce redundancy
sentences = _(sentences).chain()
.without(highest)
.map( function (s) {
_(highest.words).each( function (w) {
s.remove( w );
});
return s;
})
.value();
// If current sentence containes emphasised words,
// bumb up the relevance.
var bump = _.intersection(emphasise, _(s.words).keys()).length;
relevance += bump * 1000;
summary.push( highest.orig ) ;
counter += 1;
s.relevance = relevance;
})
var stop = (counter === opts.nSentences || sentences.length === 0);
if (stop) break;
}//~ end while
return {
'summary': summary.join('.'),
'sentences': summary
};
};
return sum;
};
// Highest relevance sentence.
var highest = _(sentences).max( function (s) {
return s.relevance;
});
// exports the `sum` function in node.js
if (typeof exports !== 'undefined' && typeof module !== 'undefined' && module.exports && typeof require !== 'undefined') {
var stemmer = require( 'porter-stemmer' ).stemmer;
var _ = require( 'underscore' );
_.str = require( 'underscore.string' );
_.mixin( _.str.exports() );
module.exports = wrapper(_, stemmer);
}
// exports `sum` to AMD module, defining dependencies
else if (typeof define === 'function' && define.amd) {
define('sum', [
'underscore',
'underscore.string',
'porter-stemmer'
], function(_, str, stemmer) {
return wrapper(_, stemmer);
});
}
// export in browser
else if (typeof this !== 'undefined' && this._ && this.stemmer) {
this._.mixin( this._.str.exports() );
this.sum = wrapper(this._, this.stemmer);
}
else {
throw Error( 'unsupported js environment detected' );
}
// Remove words from the remaining sentences to reduce redundancy.
sentences = _(sentences).chain()
.without(highest)
.map( function (s) {
_(highest.words).each( function (w) {
s.remove( w );
});
return s;
})
.value();
summary.push( highest.orig ) ;
counter += 1;
var stop = (counter === opts.nSentences || sentences.length === 0);
if (stop) break;
}
return {
'summary': summary.join('.'),
'sentences': summary
};
};
return sum;
};
// exports the `sum` function in node.js
if (typeof exports !== 'undefined'
&& typeof module !== 'undefined'
&& module.exports
&& typeof require !== 'undefined') {
var stemmer = require( 'porter-stemmer' ).stemmer;
var _ = require( 'underscore' );
_.str = require( 'underscore.string' );
_.mixin( _.str.exports() );
module.exports = wrapper(_, stemmer);
}
// exports `sum` to AMD module, defining dependencies
else if (typeof define === 'function' && define.amd) {
define('sum', [
'underscore',
'underscore.string',
'porter-stemmer'
], function(_, str, stemmer) {
return wrapper(_, stemmer);
});
}
// export in browser
else if (typeof this !== 'undefined'
&& this._ && this.stemmer) {
this._.mixin( this._.str.exports() );
this.sum = wrapper(this._, this.stemmer);
}
else {
throw Error( 'Unsupported js environment detected' );
}
}).call(this);
describe( 'test sum\' params', function () {
it( 'should return one sentence', function () {
var corpus = corpora[1];
var actual = sum({ 'corpus': corpus.text, 'nSentences': 1 });
var expected = 1;
expect(actual.sentences.length).toEqual( expected );
});
it( 'should return two sentences', function () {
var corpus = corpora[1];
var actual = sum({ 'corpus': corpus.text, 'nSentences': 3 });
var expected = 3;
expect(actual.sentences.length).toEqual( expected );
});
it( 'should ignore sentences that have the word `bladder` in them', function () {
var corpus = corpora[1];
var sum1 = sum({ 'corpus': corpus.text, 'nSentences': 1, 'exclude': ['bladder', 'Chubb'] });
var sum2 = sum({ 'corpus': corpus.text, 'nSentences': 1 });
expect( sum1.summary ).not.toEqual( sum2.summary );
});
it( 'should have the emphasisted word `drug` in the abstract', function () {
var corpus = corpora[1];
var sum1 = sum({ 'corpus': corpus.text, 'nSentences': 1, 'emphasise': ['Drug'] });
var actual = _.str.include( sum1.summary, 'Drug' );
expect( actual ).toBe( true );
});
it( 'should return one sentence', function () {
var corpus = corpora[1];
var actual = sum({ 'corpus': corpus.text, 'nSentences': 1 });
var expected = 1;
expect(actual.sentences.length).toEqual( expected );
});
it( 'should return two sentences', function () {
var corpus = corpora[1];
var actual = sum({ 'corpus': corpus.text, 'nSentences': 3 });
var expected = 3;
expect(actual.sentences.length).toEqual( expected );
});
it( 'should ignore sentences that have the word `bladder` in them', function () {
var corpus = corpora[1];
var sum1 = sum({ 'corpus': corpus.text, 'nSentences': 1, 'exclude': ['bladder', 'Chubb'] });
var sum2 = sum({ 'corpus': corpus.text, 'nSentences': 1 });
expect( sum1.summary ).not.toEqual( sum2.summary );
});
it( 'should have the emphasisted word `drug` in the abstract', function () {
var corpus = corpora[1];
var sum1 = sum({ 'corpus': corpus.text, 'nSentences': 1, 'emphasise': ['Drug'] });
var actual = _.str.include( sum1.summary, 'Drug' );
expect( actual ).toBe( true );
});
});
describe( 'summarize.js basic output test', function () {
corpora.forEach( function (corpus) {
it( 'should calculate the summary', function () {
var actual = sum({ 'corpus': corpus.text, 'nSentences': 3 });
var expected = 3;
expect(actual.sentences.length).toEqual( expected );
});
});
corpora.forEach( function (corpus) {
it( 'should calculate the summary', function () {
var actual = sum({ 'corpus': corpus.text, 'nSentences': 3 });
var expected = 3;
expect(actual.sentences.length).toEqual( expected );
});
});
});
describe( 'test nWords params in action', function () {
corpora.forEach( function (corpus) {
it( 'should calculate the summary', function () {
var actual = sum({ 'corpus': corpus.text, 'nWords': 5 });
expect(actual.sentences.length).not.toEqual('');
});
});
corpora.forEach( function (corpus) {
it( 'should calculate the summary', function () {
var actual = sum({ 'corpus': corpus.text, 'nWords': 5 });
expect(actual.sentences.length).not.toEqual('');
});
});
});
//TODO add tests to validate correctness of the actual output

@@ -10,31 +10,31 @@ var vows = require( 'vows' );

.addBatch({
'when summarizing a text': {
topic: function () {
var s = sum({
'corpus': corpus,
'nSentences': 3
});
return s.summary;
},
'it should output the abstract containing the most relevant sentences for the meaning of the initial text': function (error, summary) {
assert.ifError( error );
assert.isString( summary );
}
}
'when summarizing a text': {
topic: function () {
var s = sum({
'corpus': corpus,
'nSentences': 3
});
return s.summary;
},
'it should output the abstract containing the most relevant sentences for the meaning of the initial text': function (error, summary) {
assert.ifError( error );
assert.isString( summary );
}
}
})
.addBatch({
'when summarizing a text': {
topic: function () {
var s = sum({
'corpus': corpus,
'nWords': 5
});
return s.summary;
},
'it should output the abstract containing the most relevant sentences for the meaning of the initial text': function (error, summary) {
assert.ifError( error );
assert.isString( summary );
}
}
'when summarizing a text': {
topic: function () {
var s = sum({
'corpus': corpus,
'nWords': 5
});
return s.summary;
},
'it should output the abstract containing the most relevant sentences for the meaning of the initial text': function (error, summary) {
assert.ifError( error );
assert.isString( summary );
}
}
})
.export(module);

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet