New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

sum

Package Overview
Dependencies
Maintainers
1
Versions
5
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sum - npm Package Compare versions

Comparing version 0.1.1 to 0.1.2

.travis.yml

57

package.json
{
"author": "alex.topliceanu <alext@vibetrace.com> (https://github.com/topliceanu)",
"name": "sum",
"description": "text summarization utility",
"version": "0.1.1",
"homepage": "https://github.com/topliceanu/text-summarization",
"repository": {
"type": "git",
"url": "git@github.com:topliceanu/text-summarization.git"
},
"main": "./sum.js",
"engines": {
"node": ">0.4.12"
},
"dependencies": {
"underscore": "~1.3.1",
"underscore.string": "~2.0.0",
"porter-stemmer": "~0.9.1",
"vows": "0.6.1"
}
"name": "sum",
"version": "0.1.2",
"description": "text summarization utility",
"homepage": "https://github.com/topliceanu/text-summarization",
"license": "MIT",
"keywords": [
"summarization",
"nlp",
"stemmer",
"stop-words",
"express"
],
"author": "alex.topliceanu <alexandru.topliceanu@gmail.com> (https://github.com/topliceanu)",
"repository": {
"type": "git",
"url": "git://github.com:topliceanu/text-summarization.git"
},
"bugs": {
"url": "https://github.com/topliceanu/text-summarization/issues"
},
"main": "./sum.js",
"scripts": {
"test": "./node_modules/.bin/vows --spec --isolate ./tests/node/sum.js",
"lint": "./node_modules/.bin/jshint sum.js"
},
"dependencies": {
"underscore": "1.7.0",
"underscore.string": "3.0.3",
"porter-stemmer": "0.9.1"
},
"devDependencies": {
"vows": "0.8.1",
"jshint": "2.6.0"
},
"optionalDependencies": {},
"engines": {
"node": ">0.10.0"
}
}

@@ -1,80 +0,96 @@

_____ _
/ ____| (_)
| (___ _ _ _ __ ___ _ ___
_____ _
/ ____| (_)
| (___ _ _ _ __ ___ _ ___
\___ \ | | | || '_ ` _ \ | |/ __|
____) || |_| || | | | | | _ | |\__ \
|_____/ \__,_||_| |_| |_|(_)| ||___/
_/ |
|__/
_/ |
|__/
Sum.js
============
## Sum.js
[![NPM](https://nodei.co/npm/sum.png?downloads=true&stars=true)](https://nodei.co/npm/sum/)
[![NPM](https://nodei.co/npm-dl/sum.png?months=12)](https://nodei.co/npm-dl/sum/)
| Indicator | |
|:-----------------------|:-------------------------------------------------------------------------|
| continuous integration | [![Build Status](https://travis-ci.org/topliceanu/sum.svg?branch=master)](https://travis-ci.org/topliceanu/sum) |
| dependency management | [![Dependency Status](https://david-dm.org/topliceanu/sum.svg?style=flat)](https://david-dm.org/topliceanu/sum) [![devDependency Status](https://david-dm.org/topliceanu/sum/dev-status.svg?style=flat)](https://david-dm.org/topliceanu/sum#info=devDependencies) |
| change log | [CHANGELOG](https://github.com/topliceanu/sum/blob/master/CHANGELOG.md) [Releases](https://github.com/topliceanu/sum/releases) |
A simple function for summarizing text e.g. for automatically determining the sentences that are most relevant to the context of the corpus.
This library depends on the [underscore](http://documentcloud.github.com/underscore/), [underscore.string](http://epeli.github.com/underscore.string/) and [porter-stemmer](https://github.com/jedp/porter-stemmer) for the moment
This library depends on the [underscore](http://documentcloud.github.com/underscore/), [underscore.string](http://epeli.github.com/underscore.string/) and [porter-stemmer](https://github.com/jedp/porter-stemmer).
Install in node.js
==================
sudo npm install -g sum
## Install in node.js
Install in browser
==================
<script src="/lib/underscore.js"></script>
<script src="/lib/underscore.string.js"></script>
<script src="/lib/porter-stemmer.js"></script>
<script src="/sum.browser.js"></script>
```bash
sudo npm install -g sum
```
Quick Start
===========
var sum = require( 'sum' );
var bigString = "....";
var abstract = sum({ 'corpus': bigString });
## Install in browser
Further Options
===============
var sum = require( 'sum' );
var anotherBigString = "...";
var abstract = sum({
/**
* `corpus`: String - is the string you want to summarize
*/
'corpus': anotherBigString,
```html
<script src="/lib/underscore.js"></script>
<script src="/lib/underscore.string.js"></script>
<script src="/lib/porter-stemmer.js"></script>
<script src="/sum.js"></script>
```
/**
* `nSentences`: Number - controls the number of sentences from the original text included in the abstact
*/
'nSentences': 3,
## Quick Start
/**
* `nWords`: Number - controls the length in words of the nGram output. Output might be larger as some words are ignored in the algorithm but present in the abstract, for ex. prepositions. When `nWords` is set, `nSentences` is ignored
*/
'nWords': 5,
/**
* `exclude`: Array[String] - sum.js allows you to exclude from the final abstract, sentences or nGrams that contain any of the words in the `exclude` param
*/
'exclude': ['polar', 'bear'],
```javascript
var sum = require( 'sum' );
var bigString = "....";
var abstract = sum({ 'corpus': bigString });
// `abstract` is an object w/ format `{"summary":String, "sentences":Array<String>}`
// where summary is the concatenation of the array of sentences.
```
/**
* `emphasise`: Array[String] - forces sum.js to include in the summary the sentences or nGrams that contain any the words specified by `emphasise` param.
*/
'emphasise': ['magic']
});
## Further Options
```javascript
var sum = require( 'sum' );
var anotherBigString = "...";
var abstract = sum({
/**
* `corpus`: String - is the string you want to summarize
*/
'corpus': anotherBigString,
Running tests
=============
Run /tests/browser/specrunner.html in your favourite browser.
/**
* `nSentences`: Number - controls the number of sentences from the original text included in the abstact
*/
'nSentences': 3,
To run node tests, make sure you have [vows.js](http://vowsjs.org) installed then run
/**
* `nWords`: Number - controls the length in words of the nGram output. Output might be larger as some words are ignored in the algorithm but present in the abstract, for ex. prepositions. When `nWords` is set, `nSentences` is ignored
*/
'nWords': 5,
vows ./tests/node/sum.js
/**
* `exclude`: Array[String] - sum.js allows you to exclude from the final abstract, sentences or nGrams that contain any of the words in the `exclude` param
*/
'exclude': ['polar', 'bear'],
/**
* `emphasise`: Array[String] - forces sum.js to include in the summary the sentences or nGrams that contain any the words specified by `emphasise` param.
*/
'emphasise': ['magic']
});
Goals
=====
//`abstract` is an object with format {'sentences':Array<String>, 'summary':String} where summary is just the concatenation of the sentences, for convenience.
console.log("The short version of corpus is ", abstract.summary);
```
This library is intended to be fully `embeddable`. It's purpose is to be used primarly on the `client-side`.
## Running tests
Run `/tests/browser/specrunner.html` in your favourite browser.
To run node tests, run `npm run test`.
## Goals
This library is intended to be fully `embeddable`. It's purpose is to be used primarly on the `client-side`.
It should be `self-contained` so no API calls to external services.

@@ -85,5 +101,4 @@ It should be as `light` as possible, both in terms of code size and dependencies and above all it must be `fast`.

TODO
====
## TODO
1. add tests to verify the correctness of the actual output

@@ -97,8 +112,7 @@ 2. currenty the output does not preserve the ending chars of the original sentences

Licence
=======
## Licence
(The MIT License)
Copyright (c) 2009-2011 Alex Topliceanu <alext@vibetrace.com>
Copyright (c) Alex Topliceanu <alexandru.topliceanu@gmail.com>

@@ -105,0 +119,0 @@ Permission is hereby granted, free of charge, to any person obtaining

(function (_undef) {
"use strict";
"use strict";
var wrapper = function (_, stemmer) {
//default values
var defaults = {
nSentences: 1,
exclude: [],
emphasise: []
};
// regexes
var sentenceDelimiter = /[.!?;]/;
var nGramDelimiter = /[.,!?;]/;
var wordDelimiter = /\s/mg;
var matchJunk = /["#$%&'()*+,\-\/:<=>@\[\\\]\^_`{|}]/mg ;
/**
* Function wraps the library code to allow passing in the
* dependencies easily.
* @param {Object} _ - Reference to underscore.js
* @param {Object} stemmer - Porter stemmer implementation in js.
* @return {Function} Sumarization function.
*/
var wrapper = function (_, stemmer) {
var stopWords = ["", "a", "about", "above", "above", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also","although","always","am","among", "amongst", "amoungst", "amount", "an", "and", "another", "any","anyhow","anyone","anything","anyway", "anywhere", "are", "around", "as", "at", "back","be","became", "because","become","becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom","but", "by", "call", "can", "cannot", "cant", "co", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven","else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own","part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thickv", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves", "the"];
// Params default values.
var defaults = {
nSentences: 1,
exclude: [],
emphasise: []
};
// function used to clean sentences before splitting into words
var clean = function (str) {
return _(str).chain()
.unescapeHTML()
.stripTags()
.clean()
.value()
.replace( matchJunk, '' )
.toLowerCase();
};
// regexes
var sentenceDelimiter = /[.!?;]/;
var nGramDelimiter = /[.,!?;]/;
var wordDelimiter = /\s/mg;
var matchJunk = /["#$%&'()*+,\-\/:<=>@\[\\\]\^_`{|}]/mg ;
// Sentence Module
var Sentence = function (s) {
var c = clean( s );
var all = _.words( c, wordDelimiter );
var words = _(all).chain()
// remove stop words
.filter( function (w) {
return (stopWords.indexOf( w ) === -1) ;
})
// apply stemmer
.map( function (w) {
return stemmer( w );
})
// collect word frequencies
.reduce( function (collect, w) {
collect[w] = collect[w] ? collect[w] + 1 : 1 ;
return collect;
}, {}).value();
// remove a word from this sentence to reduce redundancy in results
var remove = function (w) {
return delete words[w];
};
return {
orig: s,
words: words,
remove: remove
};
};
// List of words which are ignored when computing top relevant sentences.
var stopWords = ["", "a", "about", "above", "above", "across", "after",
"afterwards", "again", "against", "all", "almost", "alone", "along",
"already", "also","although","always","am","among", "amongst",
"amoungst", "amount", "an", "and", "another", "any","anyhow",
"anyone","anything","anyway", "anywhere", "are", "around", "as",
"at", "back","be","became", "because","become","becomes",
"becoming", "been", "before", "beforehand", "behind", "being",
"below", "beside", "besides", "between", "beyond", "bill", "both",
"bottom","but", "by", "call", "can", "cannot", "cant", "co", "con",
"could", "couldnt", "cry", "de", "describe", "detail", "do", "done",
"down", "due", "during", "each", "eg", "eight", "either", "eleven",
"else", "elsewhere", "empty", "enough", "etc", "even", "ever",
"every", "everyone", "everything", "everywhere", "except", "few",
"fifteen", "fify", "fill", "find", "fire", "first", "five", "for",
"former", "formerly", "forty", "found", "four", "from", "front",
"full", "further", "get", "give", "go", "had", "has", "hasnt",
"have", "he", "hence", "her", "here", "hereafter", "hereby",
"herein", "hereupon", "hers", "herself", "him", "himself", "his",
"how", "however", "hundred", "ie", "if", "in", "inc", "indeed",
"interest", "into", "is", "it", "its", "itself", "keep", "last",
"latter", "latterly", "least", "less", "ltd", "made", "many", "may",
"me", "meanwhile", "might", "mill", "mine", "more", "moreover",
"most", "mostly", "move", "much", "must", "my", "myself", "name",
"namely", "neither", "never", "nevertheless", "next", "nine", "no",
"nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere",
"of", "off", "often", "on", "once", "one", "only", "onto", "or",
"other", "others", "otherwise", "our", "ours", "ourselves", "out",
"over", "own","part", "per", "perhaps", "please", "put", "rather",
"re", "same", "see", "seem", "seemed", "seeming", "seems",
"serious", "several", "she", "should", "show", "side", "since",
"sincere", "six", "sixty", "so", "some", "somehow", "someone",
"something", "sometime", "sometimes", "somewhere", "still", "such",
"system", "take", "ten", "than", "that", "the", "their", "them",
"themselves", "then", "thence", "there", "thereafter", "thereby",
"therefore", "therein", "thereupon", "these", "they", "thickv",
"thin", "third", "this", "those", "though", "three", "through",
"throughout", "thru", "thus", "to", "together", "too", "top",
"toward", "towards", "twelve", "twenty", "two", "un", "under",
"until", "up", "upon", "us", "very", "via", "was", "we", "well",
"were", "what", "whatever", "when", "whence", "whenever", "where",
"whereafter", "whereas", "whereby", "wherein", "whereupon",
"wherever", "whether", "which", "while", "whither", "who",
"whoever", "whole", "whom", "whose", "why", "will", "with",
"within", "without", "would", "yet", "you", "your", "yours",
"yourself", "yourselves", "the"];
var sum = function (opts){
/**
* Function used to clean sentences before splitting into words
* @param {String} str
* @return {String}
*/
var clean = function (str) {
return _(str).chain()
.unescapeHTML()
.stripTags()
.clean()
.value()
.replace( matchJunk, '' )
.toLowerCase();
};
// handle options
opts = _.extend( {}, defaults, opts );
opts.corpus = opts.corpus || _undef;
if (opts.corpus === _undef) throw Error( 'No input corpus' );
if (opts.nWords !== _undef && !_.isNumber(opts.nWords)) throw Error('Bad value for nWords');
/**
* Sentence Module. Creates object with format:
* {orig:String, words:Array<String>, remove:Function}
*/
var Sentence = function (s) {
var c = clean( s );
var all = _.words( c, wordDelimiter );
var words = _(all).chain()
// remove stop words
.filter( function (w) {
return (stopWords.indexOf( w ) === -1) ;
})
// apply stemmer
.map( function (w) {
return stemmer( w );
})
// collect word frequencies
.reduce( function (collect, w) {
collect[w] = collect[w] ? collect[w] + 1 : 1 ;
return collect;
}, {}).value();
// remove a word from this sentence to reduce redundancy in results
var remove = function (w) {
return delete words[w];
};
return {
orig: s,
words: words,
remove: remove
};
};
// clean corpus
var s = opts.corpus.split( sentenceDelimiter ); // TODO: keep the sentence ending chars
var sentences = _(s).map( function (s) {
return new Sentence(s);
});
/**
* Text summarization function.
* @param {Object} opts
* @param {String} opts.corpus - String to summarize.
* @param {String} opts.nWords - Number of words the summary should have.
* @param {String} opts.nSentences - Number of sentences the summary should have.
* @return {Object} output
* @return {Array<String>} output.sentences - The summary sentences in
* order of relevance to the input text.
* @return {String} output.summary - the concatenation of the summary
* sentences for convenience.
*/
var sum = function (opts){
// Handle options.
opts = _.extend( {}, defaults, opts );
opts.corpus = opts.corpus || _undef;
if (opts.corpus === _undef) {
throw Error( 'No input corpus' );
}
if (opts.nWords !== _undef && !_.isNumber(opts.nWords)) {
throw Error('Bad value for nWords');
}
// Clean corpus.
var s = opts.corpus.split(sentenceDelimiter);
var sentences = _(s).map( function (s) {
return new Sentence(s);
});
// splits the sentences into nGrams then applies the same algorithm
if (opts.nWords) {
// Splits the sentences into nGrams then applies the same algorithm.
if (opts.nWords) {
// `opts.nSentences` is ignored, output size is determined by lexem size
opts.nSentences = 1;
// `opts.nSentences` is ignored, output size is determined by lexem size.
opts.nSentences = 1;
var nGrams = _(sentences).reduce( function (collect, s) {
var orig = s.orig;
var partials = _(s.words).reduce( function (memo, freq, w) {
var pos = orig.indexOf(' ');
if (pos === -1) pos = orig.length;
var partial = orig.substr(0, pos);
orig = orig.substr(pos + 1);
if (partial !== '') memo.push(partial);
return memo;
}, []);
if (partials.length <= opts.nWords) {
var newSentence = new Sentence( partials.join(' '));
collect.push( newSentence );
return collect;
}
var i = 0, j = 0, n = partials.length - opts.nWords, m=partials.length, tmp;
for (i = 0; i < n; i ++) {
var tmp = ''
for (j = i; j < i+opts.nWords; j ++) {
tmp += partials[j] + ' ';
}
var newSentence = new Sentence( tmp );
collect.push( newSentence );
}
return collect;
}, []);
sentences = nGrams;
}
var nGrams = _(sentences).reduce( function (collect, s) {
var orig = s.orig;
var partials = _(s.words).reduce( function (memo, freq, w) {
var pos = orig.indexOf(' ');
if (pos === -1) {
pos = orig.length;
}
var partial = orig.substr(0, pos);
orig = orig.substr(pos + 1);
if (partial !== '') {
memo.push(partial);
}
return memo;
}, []);
if (partials.length <= opts.nWords) {
var newSentence = new Sentence( partials.join(' '));
collect.push( newSentence );
return collect;
}
var i = 0,
j = 0,
n = partials.length - opts.nWords,
m = partials.length,
tmp;
for (i = 0; i < n; i ++) {
var tmp = ''
for (j = i; j < i+opts.nWords; j ++) {
tmp += partials[j] + ' ';
}
var newSentence = new Sentence(tmp);
collect.push(newSentence);
}
return collect;
}, []);
sentences = nGrams;
}
// return all sentences that contain a givven word
var containing = function (w) {
return _(sentences).filter( function (s) {
return (s.words[w] !== undefined) ;
});
};
// if summary must exclude words in opts.exclude remove sentences that contain those words
if ( _.isArray(opts.exclude) && opts.exclude.length !== 0) {
var excludes = _(opts.exclude).map( function (w) {
return stemmer(clean(w));
});
sentences = _(sentences).filter( function (s) {
var words = _(s.words).keys();
return (_.intersection( words, excludes ).length === 0);
});
}
/**
* Return all sentences that contain a givven word.
* @param {String} w - word
* @return {Array<Object>}
*/
var containing = function (w) {
return _(sentences).filter( function (s) {
return (s.words[w] !== undefined) ;
});
};
var summary = [] ;
var counter = 0;
// If summary must exclude words in opts.exclude remove sentences
// that contain those words.
if ( _.isArray(opts.exclude) && opts.exclude.length !== 0) {
var excludes = _(opts.exclude).map( function (w) {
return stemmer(clean(w));
});
sentences = _(sentences).filter( function (s) {
var words = _(s.words).keys();
return (_.intersection( words, excludes ).length === 0);
});
}
// extract sentences in order of their relevance
while (true) {
var N = sentences.length;
var summary = [];
var counter = 0;
// builds a hash of all words with global frequencies
var words = _(sentences).reduce( function (collect,s) {
_(s.words).each( function (count, w) {
collect[w] = collect[w] ? collect[w] + count : count ;
});
return collect;
}, {});
// if summary must have the words in opts.emphasise
var emphasise = [];
if ( _.isArray(opts.emphasise) && opts.emphasise.length !== 0) {
emphasise = _(opts.emphasise).map( function (w) {
return stemmer(clean(w));
});
}
// Extract sentences in order of their relevance.
while (true) {
var N = sentences.length;
//calculate relevance for each sentence
_(sentences).each( function (s) {
var relevance = _(s.words).reduce( function (memo, freq, w) {
var local = Math.log( 1 + freq );
var global = Math.log( N / containing(w).length );
return memo = memo + (local * global);
}, 0);
// if current sentence containes emphasised words, bumb up the relevance
var bump = _.intersection(emphasise, _(s.words).keys()).length;
relevance += bump * 1000; //big enough to push it in front
// Builds a hash of all words with global frequencies.
var words = _(sentences).reduce( function (collect,s) {
_(s.words).each( function (count, w) {
collect[w] = collect[w] ? collect[w] + count : count ;
});
return collect;
}, {});
s.relevance = relevance;
})
// If summary must have the words in opts.emphasise.
var emphasise = [];
if ( _.isArray(opts.emphasise) && opts.emphasise.length !== 0) {
emphasise = _(opts.emphasise).map( function (w) {
return stemmer(clean(w));
});
}
// highest relevance sentence
var highest = _(sentences).max( function (s) {
return s.relevance;
});
// Calculate relevance for each sentence.
_(sentences).each( function (s) {
var relevance = _(s.words).reduce( function (memo, freq, w) {
var local = Math.log(1 + freq);
var global = Math.log(N / containing(w).length);
return memo = memo + (local * global);
}, 0);
// remove words from the remaining sentences to reduce redundancy
sentences = _(sentences).chain()
.without(highest)
.map( function (s) {
_(highest.words).each( function (w) {
s.remove( w );
});
return s;
})
.value();
// If current sentence containes emphasised words,
// bumb up the relevance.
var bump = _.intersection(emphasise, _(s.words).keys()).length;
relevance += bump * 1000;
summary.push( highest.orig ) ;
counter += 1;
s.relevance = relevance;
})
var stop = (counter === opts.nSentences || sentences.length === 0);
if (stop) break;
}//~ end while
return {
'summary': summary.join('.'),
'sentences': summary
};
};
return sum;
};
// Highest relevance sentence.
var highest = _(sentences).max( function (s) {
return s.relevance;
});
// exports the `sum` function in node.js
if (typeof exports !== 'undefined' && typeof module !== 'undefined' && module.exports && typeof require !== 'undefined') {
var stemmer = require( 'porter-stemmer' ).stemmer;
var _ = require( 'underscore' );
_.str = require( 'underscore.string' );
_.mixin( _.str.exports() );
module.exports = wrapper(_, stemmer);
}
// exports `sum` to AMD module, defining dependencies
else if (typeof define === 'function' && define.amd) {
define('sum', [
'underscore',
'underscore.string',
'porter-stemmer'
], function(_, str, stemmer) {
return wrapper(_, stemmer);
});
}
// export in browser
else if (typeof this !== 'undefined' && this._ && this.stemmer) {
this._.mixin( this._.str.exports() );
this.sum = wrapper(this._, this.stemmer);
}
else {
throw Error( 'unsupported js environment detected' );
}
// Remove words from the remaining sentences to reduce redundancy.
sentences = _(sentences).chain()
.without(highest)
.map( function (s) {
_(highest.words).each( function (w) {
s.remove( w );
});
return s;
})
.value();
summary.push( highest.orig ) ;
counter += 1;
var stop = (counter === opts.nSentences || sentences.length === 0);
if (stop) break;
}
return {
'summary': summary.join('.'),
'sentences': summary
};
};
return sum;
};
// exports the `sum` function in node.js
if (typeof exports !== 'undefined'
&& typeof module !== 'undefined'
&& module.exports
&& typeof require !== 'undefined') {
var stemmer = require( 'porter-stemmer' ).stemmer;
var _ = require( 'underscore' );
_.str = require( 'underscore.string' );
_.mixin( _.str.exports() );
module.exports = wrapper(_, stemmer);
}
// exports `sum` to AMD module, defining dependencies
else if (typeof define === 'function' && define.amd) {
define('sum', [
'underscore',
'underscore.string',
'porter-stemmer'
], function(_, str, stemmer) {
return wrapper(_, stemmer);
});
}
// export in browser
else if (typeof this !== 'undefined'
&& this._ && this.stemmer) {
this._.mixin( this._.str.exports() );
this.sum = wrapper(this._, this.stemmer);
}
else {
throw Error( 'Unsupported js environment detected' );
}
}).call(this);
describe( 'test sum\' params', function () {
it( 'should return one sentence', function () {
var corpus = corpora[1];
var actual = sum({ 'corpus': corpus.text, 'nSentences': 1 });
var expected = 1;
expect(actual.sentences.length).toEqual( expected );
});
it( 'should return two sentences', function () {
var corpus = corpora[1];
var actual = sum({ 'corpus': corpus.text, 'nSentences': 3 });
var expected = 3;
expect(actual.sentences.length).toEqual( expected );
});
it( 'should ignore sentences that have the word `bladder` in them', function () {
var corpus = corpora[1];
var sum1 = sum({ 'corpus': corpus.text, 'nSentences': 1, 'exclude': ['bladder', 'Chubb'] });
var sum2 = sum({ 'corpus': corpus.text, 'nSentences': 1 });
expect( sum1.summary ).not.toEqual( sum2.summary );
});
it( 'should have the emphasisted word `drug` in the abstract', function () {
var corpus = corpora[1];
var sum1 = sum({ 'corpus': corpus.text, 'nSentences': 1, 'emphasise': ['Drug'] });
var actual = _.str.include( sum1.summary, 'Drug' );
expect( actual ).toBe( true );
});
it( 'should return one sentence', function () {
var corpus = corpora[1];
var actual = sum({ 'corpus': corpus.text, 'nSentences': 1 });
var expected = 1;
expect(actual.sentences.length).toEqual( expected );
});
it( 'should return two sentences', function () {
var corpus = corpora[1];
var actual = sum({ 'corpus': corpus.text, 'nSentences': 3 });
var expected = 3;
expect(actual.sentences.length).toEqual( expected );
});
it( 'should ignore sentences that have the word `bladder` in them', function () {
var corpus = corpora[1];
var sum1 = sum({ 'corpus': corpus.text, 'nSentences': 1, 'exclude': ['bladder', 'Chubb'] });
var sum2 = sum({ 'corpus': corpus.text, 'nSentences': 1 });
expect( sum1.summary ).not.toEqual( sum2.summary );
});
it( 'should have the emphasisted word `drug` in the abstract', function () {
var corpus = corpora[1];
var sum1 = sum({ 'corpus': corpus.text, 'nSentences': 1, 'emphasise': ['Drug'] });
var actual = _.str.include( sum1.summary, 'Drug' );
expect( actual ).toBe( true );
});
});
describe( 'summarize.js basic output test', function () {
corpora.forEach( function (corpus) {
it( 'should calculate the summary', function () {
var actual = sum({ 'corpus': corpus.text, 'nSentences': 3 });
var expected = 3;
expect(actual.sentences.length).toEqual( expected );
});
});
corpora.forEach( function (corpus) {
it( 'should calculate the summary', function () {
var actual = sum({ 'corpus': corpus.text, 'nSentences': 3 });
var expected = 3;
expect(actual.sentences.length).toEqual( expected );
});
});
});
describe( 'test nWords params in action', function () {
corpora.forEach( function (corpus) {
it( 'should calculate the summary', function () {
var actual = sum({ 'corpus': corpus.text, 'nWords': 5 });
expect(actual.sentences.length).not.toEqual('');
});
});
corpora.forEach( function (corpus) {
it( 'should calculate the summary', function () {
var actual = sum({ 'corpus': corpus.text, 'nWords': 5 });
expect(actual.sentences.length).not.toEqual('');
});
});
});
//TODO add tests to validate correctness of the actual output

@@ -10,31 +10,31 @@ var vows = require( 'vows' );

.addBatch({
'when summarizing a text': {
topic: function () {
var s = sum({
'corpus': corpus,
'nSentences': 3
});
return s.summary;
},
'it should output the abstract containing the most relevant sentences for the meaning of the initial text': function (error, summary) {
assert.ifError( error );
assert.isString( summary );
}
}
'when summarizing a text': {
topic: function () {
var s = sum({
'corpus': corpus,
'nSentences': 3
});
return s.summary;
},
'it should output the abstract containing the most relevant sentences for the meaning of the initial text': function (error, summary) {
assert.ifError( error );
assert.isString( summary );
}
}
})
.addBatch({
'when summarizing a text': {
topic: function () {
var s = sum({
'corpus': corpus,
'nWords': 5
});
return s.summary;
},
'it should output the abstract containing the most relevant sentences for the meaning of the initial text': function (error, summary) {
assert.ifError( error );
assert.isString( summary );
}
}
'when summarizing a text': {
topic: function () {
var s = sum({
'corpus': corpus,
'nWords': 5
});
return s.summary;
},
'it should output the abstract containing the most relevant sentences for the meaning of the initial text': function (error, summary) {
assert.ifError( error );
assert.isString( summary );
}
}
})
.export(module);

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc