markov-strings
Advanced tools
Comparing version 1.2.0 to 1.3.0
109
index.js
'use strict'; | ||
const _ = require('lodash'); | ||
const _ = require('lodash'), | ||
debug = require('debug')('markov-strings'); | ||
class Generator { | ||
/** | ||
* Constructor | ||
* @param data An array of strings or objects. If 'data' is an array of objects, each object must have a 'string' attribute | ||
* @param options An object of options. If not set, sensible defaults will be used. | ||
*/ | ||
constructor(data, options) { | ||
this.data = data; | ||
this.data = this.formatData(data); | ||
this.corpus = undefined; | ||
@@ -28,2 +35,19 @@ this.startWords = []; | ||
formatData(data) { | ||
if (_.isString(data[0])) { | ||
// If data is an array of strings, wrap them into objects | ||
const newData = []; | ||
data.forEach(string => { | ||
newData.push({ | ||
string: string | ||
}) | ||
}) | ||
return newData; | ||
} | ||
if (!data[0].hasOwnProperty('string')) { | ||
throw new Error('Objects in your corpus must have a "string" property'); | ||
} | ||
return data; | ||
} | ||
buildCorpus() { | ||
@@ -39,3 +63,4 @@ return new Promise((resolve, reject) => { | ||
this.corpus = {}; | ||
this.data.forEach(line => { | ||
this.data.forEach(item => { | ||
const line = item.string; | ||
const words = line.split(' '); | ||
@@ -45,4 +70,7 @@ | ||
const start = _.slice(words, 0, options.stateSize).join(' '); | ||
if (!_.includes(this.startWords, start)) { | ||
this.startWords.push(start); | ||
const oldStartObj = _.find(this.startWords, o => o.words == start); | ||
if (oldStartObj) { | ||
if (!_.includes(oldStartObj.refs, item)) { oldStartObj.refs.push(item); } | ||
} else { | ||
this.startWords.push({ words: start, refs: [item] }); | ||
} | ||
@@ -52,4 +80,7 @@ | ||
const end = _.slice(words, words.length - options.stateSize, words.length).join(' '); | ||
if (!_.includes(this.endWords, end)) { | ||
this.endWords.push(end); | ||
const oldEndObj = _.find(this.endWords, o => o.words == end); | ||
if (oldEndObj) { | ||
if (!_.includes(oldEndObj.refs, item)) { oldEndObj.refs.push(item); } | ||
} else { | ||
this.endWords.push({ words: end, refs: [item] }); | ||
} | ||
@@ -67,9 +98,12 @@ | ||
if (this.corpus.hasOwnProperty(curr)) { | ||
if (!_.includes(this.corpus[curr], next)) { | ||
this.corpus[curr].push(next); | ||
// If corpus already owns this chain | ||
const oldObj = _.find(this.corpus[curr], o => o.words == next); | ||
if (oldObj) { | ||
oldObj.refs.push(item); | ||
} else { | ||
this.corpus[curr].push({ words: next, refs: [item] }) | ||
} | ||
} else { | ||
this.corpus[curr] = [{ words: next, refs: [item] }]; | ||
} | ||
else { | ||
this.corpus[curr] = [next]; | ||
} | ||
} | ||
@@ -82,5 +116,5 @@ }); | ||
try { | ||
resolve(this.generateSentenceSync(options)) | ||
} | ||
catch (e) { | ||
const result = this.generateSentenceSync(options); | ||
resolve(result); | ||
} catch (e) { | ||
reject(e); | ||
@@ -91,9 +125,9 @@ } | ||
generateSentenceSync(options) { | ||
generateSentenceSync(options = {}) { | ||
if (!this.corpus) { | ||
throw new Error('Corpus is not built.') | ||
} | ||
options = options ? options : {}; | ||
_.assignIn(this.options, options); | ||
options = this.options; | ||
const newOptions = {}; | ||
_.assignIn(newOptions, this.options, options); | ||
options = newOptions; | ||
@@ -111,4 +145,4 @@ const corpus = _.cloneDeep(this.corpus); | ||
while (true) { | ||
const key = arr[arr.length - 1]; // Last value in array | ||
const state = _.sample(corpus[key]); | ||
const block = arr[arr.length - 1]; // Last value in array | ||
const state = _.sample(corpus[block.words]); | ||
@@ -124,6 +158,6 @@ // Sentence cannot be finished | ||
// Increment score | ||
score += corpus[key].length - 1; // Increment score | ||
score += corpus[block.words].length - 1; // Increment score | ||
// Is sentence finished? | ||
if (_.includes(this.endWords, state)) { | ||
if (_.some(this.endWords, { words: state.words })) { | ||
ended = true; | ||
@@ -133,15 +167,14 @@ break; | ||
} | ||
const scorePerWord = parseInt(score/arr.length); | ||
const scorePerWord = parseInt(score / arr.length); | ||
const sentence = arr.join(' ').trim(); | ||
const sentence = _.map(arr, 'words').join(' ').trim(); | ||
// Sentence is not ended or incorrect | ||
if ( | ||
!ended | ||
|| typeof options.checker === 'function' && !options.checker(sentence) | ||
|| options.minWords > 0 && sentence.split(' ').length < options.minWords | ||
|| options.maxWords > 0 && sentence.split(' ').length > options.maxWords | ||
|| options.maxLength > 0 && sentence.length > options.maxLength | ||
|| score < options.minScore | ||
|| scorePerWord < options.minScorePerWord | ||
if (!ended || | ||
typeof options.checker === 'function' && !options.checker(sentence) || // checker cb returns false | ||
options.minWords > 0 && sentence.split(' ').length < options.minWords || | ||
options.maxWords > 0 && sentence.split(' ').length > options.maxWords || | ||
options.maxLength > 0 && sentence.length > options.maxLength || | ||
score < options.minScore || | ||
scorePerWord < options.minScorePerWord | ||
) { | ||
@@ -151,5 +184,9 @@ continue; | ||
return {string: sentence, score: score, scorePerWord: scorePerWord}; | ||
return { | ||
string: sentence, | ||
score: score, | ||
scorePerWord: scorePerWord, | ||
refs: _.uniqBy(_.flatten(_.map(arr, 'refs')), 'string') | ||
}; | ||
} | ||
throw new Error('Cannot build sentence with current corpus and options'); | ||
@@ -159,2 +196,2 @@ } | ||
module.exports = Generator; | ||
module.exports = Generator; |
{ | ||
"name": "markov-strings", | ||
"version": "1.2.0", | ||
"version": "1.3.0", | ||
"description": "A Markov string generator", | ||
@@ -34,4 +34,5 @@ "main": "index.js", | ||
"dependencies": { | ||
"debug": "^2.3.2", | ||
"lodash": "^4.13.1" | ||
} | ||
} |
[![Build Status](https://travis-ci.org/scambier/markov-strings.svg?branch=master)](https://travis-ci.org/scambier/markov-strings) | ||
[![Coverage Status](https://coveralls.io/repos/github/scambier/markov-strings/badge.svg?branch=master)](https://coveralls.io/github/scambier/markov-strings?branch=master) | ||
[![npm version](https://badge.fury.io/js/markov-strings.svg)](https://badge.fury.io/js/markov-strings) | ||
[![npm version](https://badge.fury.io/js/markov-strings.svg)](https://badge.fury.io/js/markov-strings) [![dep](https://david-dm.org/scambier/markov-strings.svg)](https://david-dm.org/scambier/markov-strings#info=devDependencies) | ||
@@ -26,2 +26,3 @@ <!-- TOC depthFrom:1 depthTo:6 withLinks:1 updateOnSave:1 orderedList:0 --> | ||
- [options](#options) | ||
- [Changelog](#changelog) | ||
- [Running the tests](#running-the-tests) | ||
@@ -90,3 +91,5 @@ | ||
string: 'lorem ipsum dolor sit amet etc. https://github.com/scambier/markov-strings', | ||
score: 42 | ||
score: 42, | ||
scorePerWord: 6, | ||
refs: [ an array of objects ] | ||
} | ||
@@ -104,4 +107,7 @@ */ | ||
`data` is an array of strings (sentences). The bigger the array, the better and more various the results. | ||
`data` is an array of strings (sentences), or an array of objects. If you wish to use objects, each one must have a `string` attribute. The bigger the array, the better and more various the results. | ||
Examples: | ||
`[ 'lorem ipsum', 'dolor sit amet' ]` or `[ { string: 'lorem ipsum', attr: 'value' }, { string: 'dolor sit amet', attr: 'other value' } ]` | ||
#### options | ||
@@ -121,2 +127,4 @@ Type: `object` | ||
Note: this option cannot be used in `generateSentence()` | ||
The number of words for each state. | ||
@@ -181,5 +189,7 @@ `1` will output gibberish sentences without much sense. | ||
#### markov.generateSentence([options]) | ||
Return a Promise that will resolve to an object `{string, score}` | ||
Return a Promise that will resolve to an object `{string, score, scorePerWord, refs}` | ||
Synced function: `markov.generateSentenceSync()` | ||
The `refs` array will contain all objects that have been used to build the sentence. May be useful to fetch some meta data or make some stats. | ||
##### options | ||
@@ -190,3 +200,10 @@ Type: `object` | ||
## Changelog | ||
### 1.3.0 | ||
- New feature: the generator now accepts arrays of objects, and tells the user which objects were used to build a sentence | ||
- Fixed all unit tests | ||
- Added a changelog | ||
## Running the tests | ||
`npm test` |
177
test/test.js
'use strict'; | ||
const expect = require('chai').expect; | ||
const expect = require('chai').expect, | ||
_ = require('lodash'); | ||
@@ -19,8 +20,8 @@ const data = [ | ||
beforeEach(function(done) { | ||
beforeEach(function (done) { | ||
generator.buildCorpus().then(done); | ||
}); | ||
describe('Options parser', function() { | ||
it('should have the right values', function() { | ||
describe('Options parser', function () { | ||
it('should have the right values', function () { | ||
expect(generator.options.stateSize).to.equal(2); | ||
@@ -30,15 +31,17 @@ }); | ||
describe('Generator builder', function() { | ||
describe('In buildCorpus', function () { | ||
describe('StartWords array', function() { | ||
it('should contain the right values', function() { | ||
describe('StartWords array', function () { | ||
it('should contain the right values', function () { | ||
const start = generator.startWords; | ||
expect(start).to.contain('Lorem ipsum'); | ||
expect(start).to.contain('Consectetur adipiscing'); | ||
expect(start).to.contain('Quisque tempor,'); | ||
expect(start).to.contain('Justo nisi'); | ||
expect(start).to.contain('Egestas bibendum'); | ||
expect(_.some(start, { words: 'Lorem ipsum' })).to.be.true; | ||
expect(_.some(start, { words: 'Consectetur adipiscing' })).to.be.true; | ||
expect(_.some(start, { words: 'Quisque tempor,' })).to.be.true; | ||
expect(_.some(start, { words: 'Justo nisi' })).to.be.true; | ||
expect(_.some(start, { words: 'Egestas bibendum' })).to.be.true; | ||
expect(_.some(start, { words: 'fringilla dui' })).to.be.true; | ||
expect(_.some(start, { words: 'Fusce tincidunt' })).to.be.true; | ||
}); | ||
it('should have the right length', function() { | ||
it('should have the right length', function () { | ||
expect(generator.startWords).to.have.lengthOf(7); | ||
@@ -48,30 +51,30 @@ }); | ||
describe('EndWords array', function() { | ||
it('should have the right length', function() { | ||
describe('EndWords array', function () { | ||
it('should have the right length', function () { | ||
expect(generator.endWords).to.have.lengthOf(7); | ||
}); | ||
it('should contain the right values', function() { | ||
it('should contain the right values', function () { | ||
const end = generator.endWords; | ||
expect(end).to.contain('sit amet'); | ||
expect(end).to.contain('start words'); | ||
expect(end).to.contain('adipiscing elit'); | ||
expect(end).to.contain('fringilla dui'); | ||
expect(end).to.contain('ut lacus'); | ||
expect(end).to.contain('est rien…'); | ||
expect(_.some(end, { words: 'sit amet' })).to.be.true; | ||
expect(_.some(end, { words: 'start words' })).to.be.true; | ||
expect(_.some(end, { words: 'adipiscing elit' })).to.be.true; | ||
expect(_.some(end, { words: 'fringilla dui' })).to.be.true; | ||
expect(_.some(end, { words: 'ut lacus' })).to.be.true; | ||
expect(_.some(end, { words: 'est rien…' })).to.be.true; | ||
}); | ||
}); | ||
describe('Corpus', function() { | ||
it('should have the right values for the right keys', function() { | ||
describe('Corpus', function () { | ||
it('should have the right values for the right keys', function () { | ||
const corpus = generator.corpus; | ||
expect(corpus['Lorem ipsum']).to.contain('dolor sit'); | ||
expect(corpus['Lorem ipsum']).to.contain('duplicate start'); | ||
expect(corpus['tempor, erat']).to.contain('vel lacinia'); | ||
expect(_.some(corpus['Lorem ipsum'], { words: 'dolor sit' })).to.be.true; | ||
expect(_.some(corpus['Lorem ipsum'], { words: 'duplicate start' })).to.be.true; | ||
expect(_.some(corpus['tempor, erat'], { words: 'vel lacinia' })).to.be.true; | ||
}); | ||
}); | ||
describe('Options', function() { | ||
it('should take given options into account', function() { | ||
const generator = new Generator([], {maxTries: 2}); | ||
describe('Options', function () { | ||
it('should take given options into account', function () { | ||
const generator = new Generator(['lorem'], { maxTries: 2 }); | ||
expect(generator.options.maxTries).to.equal(2); | ||
@@ -82,5 +85,5 @@ }) | ||
describe('Sentence generator', function() { | ||
describe('Sentence generator', function () { | ||
it('should throw an error if corpus is not built', function() { | ||
it('should throw an error if corpus is not built', function () { | ||
const generator = new Generator(data); | ||
@@ -92,25 +95,20 @@ expect(() => { | ||
it('should output a sentence', function() { | ||
generator.generateSentence({stateSize: 1}) | ||
it('should output a sentence', function (done) { | ||
generator.generateSentence({ stateSize: 1 }) | ||
.then(result => { | ||
expect(result).to.exist; | ||
done(); | ||
}); | ||
}); | ||
it('should throw an error when a sentence cannot be built', function() { | ||
it('should end with a value from endWords', function (done) { | ||
for (let i = 0; i < 10; i++) { | ||
generator.generateSentence({stateSize: 3}) | ||
.then(result => { | ||
expect(result).to.throw(Error); | ||
}); | ||
} | ||
}); | ||
it('should end with a value from endWords', function() { | ||
for (let i = 0; i < 10; i++) { | ||
generator.generateSentence() | ||
.then(result => { | ||
const arr = result.split(' '); | ||
const arr = result.string.split(' '); | ||
const end = arr.slice(arr.length - 2, arr.length); | ||
expect(generator.endWords).to.contain(end.join(' ')); | ||
expect(_.map(generator.endWords, 'words')).to.contain(end.join(' ')); | ||
if (i === 9) { | ||
done(); | ||
} | ||
}); | ||
@@ -120,65 +118,78 @@ } | ||
it('should reject the sentence', function() { | ||
const options = {minWords: 5, maxTries: 10}; | ||
generator.generateSentence(options, result => result.split(' ').length < 5) | ||
.then(result => { | ||
expect(result).to.throw(Error); | ||
}); | ||
it('should reject the sentence', function (done) { | ||
const options = { | ||
minWords: 5, | ||
maxTries: 10, | ||
checker: result => result.string.split(' ').length < 5 | ||
}; | ||
generator.generateSentence(options) | ||
.catch(e => { | ||
expect(e).to.be.an('error'); | ||
done(); | ||
}) | ||
}); | ||
it('should accept the sentence', function() { | ||
generator.generateSentence({}, result => true) | ||
.then(result => { | ||
expect(result).to.exist; | ||
it('should reject because maxLength is unattainable', function (done) { | ||
generator.generateSentence({ maxTries: 100, maxLength: 1, minWords: 0, maxWords: 0 }) | ||
.catch(e => { | ||
expect(e).to.be.an('error'); | ||
done(); | ||
}); | ||
}); | ||
it('should reject because maxLength is unattainable', function() { | ||
generator.generateSentence({maxTries: 100, maxLength: 1, minWords: 0, maxWords: 0}) | ||
.then(result => { | ||
expect(result).to.throw(Error); | ||
it('should reject because minWords is unattainable', function (done) { | ||
generator.generateSentence({ maxTries: 100, minWords: 100 }) | ||
.catch(e => { | ||
expect(e).to.be.an('error'); | ||
done(); | ||
}); | ||
}); | ||
it('should reject because minWords is unattainable', function() { | ||
generator.generateSentence({maxTries: 100, minWords: 100}) | ||
.then(result => { | ||
expect(result).to.throw(Error); | ||
it('should reject because minScore is unattainable', function (done) { | ||
generator.generateSentence({ maxTries: 100, minScore: 20 }) | ||
.catch(e => { | ||
expect(e).to.be.an('error'); | ||
done(); | ||
}); | ||
}); | ||
it('should reject because minScore is unattainable', function() { | ||
generator.generateSentence({maxTries: 100, minScore: 20}) | ||
.then(result => { | ||
expect(result).to.throw(Error); | ||
it('should reject because maxWords is unattainable', function (done) { | ||
generator.generateSentence({ maxTries: 100, maxWords: 1, minWords: 0 }) | ||
.catch(e => { | ||
expect(e).to.be.an('error'); | ||
done(); | ||
}); | ||
}); | ||
it('should reject because maxWords is unattainable', function() { | ||
generator.generateSentence({maxTries: 100, maxWords: 1, minWords: 0}) | ||
.then(result => { | ||
expect(result).to.throw(Error); | ||
it('should reject all sentences because of the callback', function (done) { | ||
generator.generateSentence({ | ||
maxTries: 100, | ||
checker: result => false | ||
}) | ||
.catch(e => { | ||
expect(e).to.be.an('error'); | ||
done(); | ||
}); | ||
}); | ||
it('should reject all sentences because of the callback', function() { | ||
it('should accept all sentences because of the callback', function (done) { | ||
generator.generateSentence({ | ||
maxTries: 100, | ||
callback: sentence => { | ||
return false; | ||
} | ||
checker: result => true | ||
}) | ||
.then(result => { | ||
expect(result).to.throw(Error); | ||
expect(result).to.exist; | ||
done(); | ||
}); | ||
}); | ||
it('should accept all sentences because of the callback', function() { | ||
it('should return an object with all attributes', function (done) { | ||
generator.generateSentence({ | ||
callback: sentence => { | ||
return true; | ||
} | ||
checker: result => true | ||
}) | ||
.then(result => { | ||
expect(result).to.exist; | ||
expect(result.string).to.exist; | ||
expect(result.score).to.exist; | ||
expect(result.scorePerWord).to.exist; | ||
expect(result.refs).to.exist; | ||
done(); | ||
}); | ||
@@ -185,0 +196,0 @@ }); |
19529
326
204
2
+ Addeddebug@^2.3.2
+ Addeddebug@2.6.9(transitive)
+ Addedms@2.0.0(transitive)