@@ -1,121 +0,37 @@
		/* jslint node: true */
		'use strict';

		var _ = require('lodash');
		var BPromise = require('bluebird');
		var debug = require('debug')('softmax');

		var async = BPromise.method;

		var Algorithm = function (options) {
		options = options \|\| {};
		var self = this;
		function Algorithm(options) {
		var opts = options \|\| {};

		if (!(self instanceof Algorithm)) {
		return new Algorithm(options);
		if (!(this instanceof Algorithm)) {
		return new Algorithm(opts);
		}

		var arms = _.isUndefined(options.arms) ? 2 : parseInt(options.arms, 10);
		var gamma = _.isUndefined(options.gamma) ? 1e-7 : parseFloat(options.gamma);
		var tau = _.isUndefined(options.tau) ? null : parseFloat(options.tau);
		var counts = [];
		var values = [];
		debug('init', opts);

		if (arms < 1) {
		this.arms = _.isUndefined(opts.arms) ? 2 : parseInt(opts.arms, 10);
		this.gamma = _.isUndefined(opts.gamma) ? 1e-7 : parseFloat(opts.gamma);
		this.tau = _.isUndefined(opts.tau) ? null : parseFloat(opts.tau);

		if (this.arms < 1) {
		throw new TypeError('invalid arms: cannot be less than 1');
		}
		else if (gamma < 0) {
		} else if (this.gamma < 0) {
		throw new TypeError('invalid gamma: cannot be less than 0');
		}
		else if (!_.isNull(tau) && tau < 0) {
		} else if (!_.isNull(this.tau) && this.tau < 0) {
		throw new TypeError('invalid tau: cannot be less than 0');
		}

		for (var i=0; i<arms; i++) {
		counts.push(0);
		values.push(0);
		}
		this.counts = Array.apply(null, Array(this.arms)).map(Number.prototype.valueOf, 0);
		this.values = Array.apply(null, Array(this.arms)).map(Number.prototype.valueOf, 0);
		}

		var api = {};
		Algorithm.prototype.load = async(require('./lib/load'));
		Algorithm.prototype.reward = async(require('./lib/reward'));
		Algorithm.prototype.select = async(require('./lib/select'));
		Algorithm.prototype.serialize = async(require('./lib/serialize'));

		api.n = 0;

		api.load = function (config) {
		arms = config.arms;
		gamma = config.gamma;
		tau = config.tau;
		counts = config.counts;
		values = config.values;

		return BPromise.resolve(values);
		};

		api.reward = function (arm, reward) {
		return new BPromise(function (resolve, reject) {
		if (!_.isNumber(arm)) {
		return reject(new TypeError('missing or invalid required parameter: arm'));
		}
		else if (!_.isNumber(reward)) {
		return reject(new TypeError('missing or invalid required parameter: reward'));
		}
		else if (arm >= arms \|\| arm < 0) {
		return reject(new TypeError('invalid arm: ' + arm + ' not in valid range (0-' + arms.length + ')'));
		}

		var ct = ++counts[arm];
		var pre = values[arm];
		var post = ((ct-1) / ct) * pre + (1/ct) * reward;

		values[arm] = post;

		api.n = _.reduce(counts, function (sum, ct) {
		return sum + ct;
		});

		resolve(values);
		});
		};

		api.select = function () {
		return new BPromise(function (resolve) {
		var arm;
		var temp = tau \|\| 1 / Math.log(api.n + 1 + gamma);

		var _values = values.map(function (v) {
		return Math.exp(v / temp);
		});

		var z = _.sum(_values);

		_values = _values.map(function (v) {
		return v / z;
		});

		var accum = 0;
		var r = _.random(0, 1, true);

		_.forEach(_values, function (v, i) {
		accum += v;
		if (accum > r) {
		arm = i;
		return false;
		}
		});

		resolve(arm);
		});
		};

		api.serialize = function () {
		return BPromise.resolve({
		arms: arms,
		gamma: gamma,
		tau: tau,
		counts: counts.slice(0),
		values: values.slice(0)
		});
		};

		return api;
		};


		module.exports = Algorithm;

package.json

		{
		"name": "softmax",
		"description": "A Promises/A+ softmax multi-armed bandit",
		"version": "0.2.1",
		"author": "banditdb",
		"contributors": [
		"kurttheviking"
		],
		"description": "A softmax multi-armed bandit algorithm",
		"version": "0.3.0",
		"license": "ISC",
		"main": "index.js",
		"keywords": [
		"multi-armed bandit",
		"softmax algorithm",
		"promise",
		"promises-a",
		"promises-aplus"
		"promises-aplus",
		"banditlab"
		],
		"main": "index.js",
		"scripts": {
		"test": "node node_modules/mocha/bin/mocha ./test/index"
		"author": {
		"name": "Kurt Ericson",
		"email": "github@kurttheviking.com",
		"url": "http://github.com/kurttheviking"
		},
		"repository": "git@github.com:banditdb/softmax.git",
		"contributors": [
		{
		"name": "Kurt Ericson",
		"email": "github@kurttheviking.com",
		"url": "https://github.com/kurttheviking"
		}
		],
		"repository": {
		"type": "git",
		"url": "git://github.com/kurttheviking/softmax.git"
		},
		"bugs": {
		"url": "https://github.com/kurttheviking/softmax/issues"
		},
		"homepage": "https://github.com/kurttheviking/softmax#readme",
		"dependencies": {
		"bluebird": "2.9.13",
		"lodash": "3.5.0"
		"bluebird": "3.3.0",
		"debug": "2.2.0",
		"lodash": "4.3.0"
		},
		"devDependencies": {
		"chai": "2.1.1",
		"mocha": "2.2.1",
		"sinon": "1.13.0",
		"sinon-chai": "2.7.0"
		"chai": "3.5.0",
		"eslint": "1.10.3",
		"eslint-config-airbnb": "5.0.0",
		"istanbul": "0.4.2",
		"mocha": "2.4.5",
		"sinon": "1.17.3"
		},
		"readmeFilename": "README.md",
		"license": "ISC"
		"scripts": {
		"coverage": "./node_modules/istanbul/lib/cli.js cover --report=json-summary --report=html _mocha ./test -- --recursive",
		"test": "node node_modules/mocha/bin/mocha ./test --recursive"
		}
		}

215

README.md

		@@ -1,40 +0,49 @@
		<a href="http://promisesaplus.com/">
		<img src="http://promisesaplus.com/assets/logo-small.png" alt="Promises/A+ logo" title="Promises/A+ 1.0 compliant" align="right" />
		</a>

		softmax
		================
		=======

		[![Build Status](https://travis-ci.org/banditdb/softmax.svg)](https://travis-ci.org/banditdb/softmax)
		[![Build Status](https://travis-ci.org/kurttheviking/softmax.svg)](https://travis-ci.org/kurttheviking/softmax)

		A Promises/A+, [multi-armed bandit](http://en.wikipedia.org/wiki/Multi-armed_bandit) implemented with a softmax algorithm.
		A softmax algorithm for multi-armed bandit problems

		This implemention is based on [<em>Bandit Algorithms for Website Optimization</em>](http://shop.oreilly.com/product/0636920027393.do) and related empirical research in ["Algorithms for the multi-armed bandit problem"](https://d2w9gswcdc2jtf.cloudfront.net/research/Algorithms+for+the+multi-armed+bandit+problem.pdf).
		This implementation is based on [<em>Bandit Algorithms for Website Optimization</em>](http://shop.oreilly.com/product/0636920027393.do) and related empirical research in ["Algorithms for the multi-armed bandit problem"](https://d2w9gswcdc2jtf.cloudfront.net/research/Algorithms+for+the+multi-armed+bandit+problem.pdf).


		## Specification

		This module conforms to the [BanditLab/1.0 specification](https://github.com/banditlab/spec-js/blob/master/README.md).


		## Quick start

		1. Create a bandit with 3 arms
		First, install this module in your project:

		```
		var Bandit = require('softmax');
		```sh
		npm install softmax --save
		```

		var bandit = new Bandit({
		arms: 3
		Then, use the algorithm:

		1. Create an optimizer with 3 arms and default [annealing](https://en.wikipedia.org/wiki/Simulated_annealing):

		```js
		var Algorithm = require('softmax');

		var algorithm = new Algorithm({
		arms: 3
		});
		```

		2. Select an arm (for exploration or exploitation, according to the algorithm)
		2. Select an arm (for exploration or exploitation, according to the algorithm):

		```
		bandit.select().then(function (arm) {
		console.log('pulled arm=' + arm);
		```js
		algorithm.select().then(function (arm) {
		...
		});
		```

		3. Report the reward earned from a chosen arm
		3. Report the reward earned from a chosen arm:

		```
		bandit.reward(1, 1).then(function (rewards) {
		console.log('arm rewards are currently=' + rewards);
		```js
		algorithm.reward(armId, value).then(function (n) {
		...
		});
		@@ -44,57 +53,44 @@ ```

		## Configuration
		## API

		#### Load the bandit algorithm
		#### `Algorithm([config])`

		Install from npm
		Create a new optimization algorithm.

		```
		npm install softmax --save
		```
		Arguments

		Require in your project
		- `config` (Object, Optional): algorithm instance parameters

		```
		var Bandit = require('softmax');
		```
		The `config` object supports three parameters:

		#### Instantiate a bandit
		- `arms`: (Number:Integer, Optional), default=2, the number of arms over which the optimization will operate
		- `gamma`: the annealing (cooling) factor – defaults to 1e-7 (0.0000001)
		- `tau`: the temperature (scaling) factor – 0 to Infinity, higher leads to more exploration

		This algorithm defaults to 2 arms and gamma (annealing factor) 1e-7
		By default, `gamma` of 1e-7 will cause the algorithm to explore less as more information is received. In this case, the underlying "temperature" is changing. If this behavior is not desired, set `tau` to instead employ an algorithm with a fixed temperature. If `tau` is provided then `gamma` is ignored.

		```
		var bandit = new Bandit();
		```
		Returns

		The constructor accepts an options object that supports three parameters:
		An instance of the softmax optimization algorithm.

		- `arms`: the number of arms over which the bandit can operate
		- `gamma`: the annealing (cooling) factor – defaults to 1e-7 (0.0000001)
		- `tau`: the temperature (scaling) factor – 0 to Infinity, higher leads to more exploration
		Example

		By default, `gamma` of 1e-7 will cause the algorithm to explore less as more information is received. In this case, the underlying "temperature" is changing. If this behavior is not desired, set `tau` to instead employ a softmax algorithm with a fixed temperature. Note that `gamma` has no effect (and is ignored) if `tau` is set.

		```js
		> var Algorithm = require('softmax');
		> var algorithm = new Algorithm();
		> assert.equal(algorithm.arms, 3);
		> assert.equal(algorithm.gamma, 0.0000001);
		```
		var bandit = new Bandit({
		arms: 4,
		gamma: 1e-9
		});
		```

		or
		Or, with a passed `config`:

		```js
		> var Algorithm = require('softmax');
		> var algorithm = new Algorithm({arms: 4, tau: 0.000005});
		> assert.equal(algorithm.arms, 4);
		> assert.equal(algorithm.tau, 0.000005);
		```
		var bandit = new Bandit({
		arms: 4,
		tau: 0.1
		});
		```

		#### `Algorithm#select()`

		## API

		All banditdb algorithms, including this implementation, provide the same Promises/A+ interface.

		#### `bandit.select()`

		Choose an arm to play, according to the specified bandit algorithm.
		@@ -112,6 +108,6 @@

		```
		> var Bandit = require('softmax');
		> var bandit = new Bandit();
		> bandit.select().then(function (arm) { console.log(arm); });
		```js
		> var Algorithm = require('softmax');
		> var algorithm = new Algorithm();
		> algorithm.select().then(function (arm) { console.log(arm); });

		@@ -121,3 +117,3 @@ 0

		#### `bandit.reward(arm, reward)`
		#### `Algorithm#reward(arm, reward)`

		@@ -128,3 +124,3 @@ Inform the algorithm about the payoff from a given arm.

		- `arm` (Integer): the arm index (provided from `bandit.select()`)
		- `arm` (Integer): the arm index (provided from `algorithm.select()`)
		- `reward` (Number): the observed reward value (which can be 0, to indicate no reward)
		@@ -134,25 +130,17 @@

		A promise that resolves to an Array of the current reward state of each arm; each position in the array corresponds to the associated arm index.
		A promise that resolves to a Number representing the count of observed rounds.

		Example

		```
		> var Bandit = require('softmax');
		> var bandit = new Bandit();
		> bandit.reward(0, 1).then(function (rewards) { console.log(rewards); });
		```js
		> var Algorithm = require('softmax');
		> var algorithm = new Algorithm();
		> algorithm.reward(0, 1).then(function (n) { console.log(n); });

		[1, 0]

		> bandit.reward(1, 1).then(function (rewards) { console.log(rewards); });

		[1, 1]

		> bandit.reward(1, 0).then(function (rewards) { console.log(rewards); });

		[1, 0.5]
		1
		```

		#### `bandit.serialize()`
		#### `Algorithm#serialize()`

		Obtain a persistable JSON object representing the internal state of the algorithm.
		Obtain a plain object representing the internal state of the algorithm.

		@@ -169,63 +157,54 @@ Arguments

		```
		> var Bandit = require('softmax');
		> var bandit = new Bandit();
		> bandit.serialize().then(function (state) { console.log(state); });
		```js
		> var Algorithm = require('softmax');
		> var algorithm = new Algorithm();
		> algorithm.serialize().then(function (state) { console.log(state); });

		{
		arms: 2,
		gamma: 0.0000001,
		tau: null,
		counts: [ 0, 0 ],
		values: [ 0, 0 ]
		arms: 2,
		gamma: 0.0000001,
		counts: [0, 0],
		values: [0, 0]
		}
		```

		#### `bandit.load(state)`
		#### `Algorithm#load(state)`

		Restore an instance of a bandit to a perviously serialized algorithm state. This method overrides any options parameters passed at instantiation.
		Restore an instance of an algorithm to a previously serialized state. This method overrides any options parameters passed at instantiation.

		Arguments

		- `state` (Object): a serialized algorithm state (provided from `bandit.serialize()`)
		- `state` (Object): a serialized algorithm state (provided from `algorithm.serialize()`)

		Returns

		A promise that resolves to an Array of the current reward state of each arm; each position in the array corresponds to the associated arm index.
		A promise that resolves to a Number representing the count of observed rounds.

		Example

		```
		> var state = { arms: 2, gamma: 0.0000001, tau: null, counts: [ 1, 2 ], values: [ 1, 0.5 ] };
		> var Bandit = require('softmax');
		> var bandit = new Bandit();
		> bandit.load(state).then(function (rewards) { console.log(rewards); });
		```js
		> var state = {arms: 2, gamma: 0.0000001, counts: [1, 2], values: [1, 0.5]};
		> var Algorithm = require('softmax');
		> var algorithm = new Algorithm();
		> algorithm.load(state).then(function (n) { console.log(n); });

		[1, 0.5]
		3
		```

		#### `bandit.n`

		(Number) An instance property representing the total number of recorded reward samples, updated at each `bandit.reward()` call.
		## Tests

		Example
		To run the unit test suite:

		```
		> var Bandit = require('softmax');
		> var bandit = new Bandit();
		> bandit.reward(0, 1).then(function () { console.log(bandit.n); });

		1
		npm test
		```

		Or, to run the test suite and view test coverage:

		## Tests

		To run the full unit test suite

		```sh
		npm run coverage
		```
		npm test
		```

		Tests against stochastic methods (e.g. `bandit.select()`) are inherently tricky to test with deterministic assertions. The approach here is to iterate across a semi-random set of conditions to verify that each run produces valid output. So, strictly speaking, each call to `npm test` is executing a slightly different test suite. At some point, the test suite may be expanded to include a more robust test of the distribution's properties – though because of the number of runs required, would be triggered with an optional flag.
		Note: tests against stochastic methods (e.g. `algorithm.select()`) are inherently tricky to test with deterministic assertions. The approach here is to iterate across a semi-random set of conditions to verify that each run produces valid output. So, strictly speaking, each call to `npm test` is executing a slightly different test suite. At some point, the test suite may be expanded to include a more robust test of the distribution's properties – though because of the number of runs required, would be triggered with an optional flag.

		@@ -235,3 +214,3 @@

		PRs are welcome! For bugs, please include a failing test which passes when your PR is applied.
		PRs are welcome! For bugs, please include a failing test which passes when your PR is applied. [Travis CI](https://travis-ci.org/kurttheviking/softmax) provides on-demand testing for commits and pull requests.

		@@ -241,4 +220,4 @@

		Currently, this implementation relies on the [native Math.random()](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/random) which uses a seeded "random" number generator. In addition, the underlying calculations often encounter extended floating point numbers. Arm selection is therefore subject to JavaScript's floating point precision limitations. For general information about floating point issues see the [floating point guide](http://floating-point-gui.de/).
		Currently, this implementation relies on the [native Math.random()](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/random) which uses a seeded "random" number generator. In addition, the underlying calculations often encounter extended floating point numbers. Arm selection is therefore subject to JavaScript's floating point precision limitations. For general information about floating point issues see the [floating point guide](http://floating-point-gui.de).

		While these factors generally do not impede commercial application, I would consider the implementation suspect in any academic setting.
		While these factors generally do not impede common application, I would consider the implementation suspect in an academic setting.

test/index.js

.npmignore

Sorry, the diff of this file is not supported yet

.travis.yml

Sorry, the diff of this file is not supported yet

softmax - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics

Dependency changes