Comparing version 0.1.0 to 0.2.0
@@ -38,5 +38,5 @@ /** | ||
Object.defineProperty(bothan, 'version', { | ||
value: '0.1.0' | ||
value: '0.2.0' | ||
}); | ||
module.exports = bothan; |
{ | ||
"name": "bothan", | ||
"version": "0.1.0", | ||
"description": "A node.js phantom controller for scraping purposes.", | ||
"version": "0.2.0", | ||
"description": "A node.js phantom interface for scraping purposes.", | ||
"main": "index.js", | ||
"scripts": { | ||
"test": "gulp test" | ||
"test": "mocha -R spec ./test/endpoint.js", | ||
"lint": "jshint src && jshint test && jshint shared && jshint phantom" | ||
}, | ||
@@ -27,14 +28,10 @@ "repository": { | ||
"async": "^0.9.0", | ||
"estafet": "git+https://github.com/Yomguithereal/estafet.git", | ||
"lodash": "^2.4.1", | ||
"events": "^1.0.2", | ||
"phantomjs": "~1.9.10", | ||
"uuid": "~2.0.1", | ||
"ws": "~0.4.32" | ||
"ws": "^0.7.0" | ||
}, | ||
"devDependencies": { | ||
"gulp": "^3.8.9", | ||
"gulp-jshint": "^1.8.6", | ||
"gulp-mocha": "^1.1.1", | ||
"mocha": "~1.21.4" | ||
"jshint": "^2.6.0", | ||
"mocha": "~2.1.0" | ||
} | ||
} |
@@ -8,5 +8,5 @@ /** | ||
*/ | ||
var Messenger = require('estafet'), | ||
helpers = require('../shared/helpers.js'), | ||
config = require('../shared/config.js'); | ||
var helpers = require('../shared/helpers.js'), | ||
config = require('../shared/config.js'), | ||
EventEmitter = require('events').EventEmitter; | ||
@@ -27,6 +27,6 @@ // Helpers | ||
this.connected = false; | ||
this.name = null; | ||
this.ee = new EventEmitter(); | ||
// Methods | ||
// TODO: implement a timeout | ||
// TODO: implement a phantom-side timeout | ||
this.setup = function(params, next) { | ||
@@ -42,23 +42,35 @@ params = params || {}; | ||
// Creating messenger | ||
self.messenger = new Messenger(params.name, { | ||
receptor: function(callback) { | ||
self.ws.onmessage = function(msg) { | ||
callback(JSON.parse(msg.data)); | ||
}; | ||
}, | ||
emitter: function(data) { | ||
self.ws.send(JSON.stringify(data)); | ||
} | ||
// Event delegation | ||
function delegate(name) { | ||
return self.ee[name].bind(self.ee); | ||
} | ||
self.ws.addEventListener('message', function(msg) { | ||
var parsedMsg = JSON.parse(msg.data); | ||
self.ee.emit(parsedMsg.head, parsedMsg); | ||
}); | ||
self.parent = self.messenger.conversation('Spynet'); | ||
// Constructing parent abstraction | ||
self.parent = { | ||
request: helpers.request.bind(null, self.ws), | ||
send: function(head, body) { | ||
self.ws.send(JSON.stringify({ | ||
from: params.name, | ||
head: head, | ||
body: body | ||
})); | ||
}, | ||
replyTo: helpers.replyTo.bind(null, self.ws), | ||
on: delegate('on'), | ||
once: delegate('once'), | ||
removeListener: delegate('removeListener') | ||
}; | ||
// Perform tricks here | ||
// NOTE: executing binding here to avoid racing conditions | ||
if (params.bindings) | ||
require(params.bindings)(self.parent, params.data); | ||
// Performing handshake | ||
self.parent.request('handshake', next); | ||
// Handshake | ||
self.parent.request('handshake', {from: params.name}, function(err) { | ||
if (err) | ||
return next(err); | ||
else | ||
return next(null, self.parent); | ||
}); | ||
}; | ||
@@ -65,0 +77,0 @@ }; |
@@ -12,2 +12,6 @@ /** | ||
// Injecting polyfills | ||
if (phantom.version.major < 2) | ||
require('./polyfills.js'); | ||
// Checking CLI arguments so we get proper configuration from parent | ||
@@ -25,3 +29,3 @@ var params = JSON.parse(args[1]); | ||
// Setup the comlink | ||
comlink.setup(params, function(err) { | ||
comlink.setup(params, function(err, parentObject) { | ||
@@ -31,2 +35,6 @@ // If the socket server timed out, we exit | ||
return phantom.exit(1); | ||
// Executing bindings | ||
if (params.bindings) | ||
require(params.bindings)(parentObject, params.data); | ||
}); |
187
README.md
# Bothan | ||
**bothan.js** is a low-level [phantomjs](http://phantomjs.org/) controller that can be used with node.js and initially intended to perform scraping tasks. | ||
This controller is used by [sandcrawler](https://github.com/medialab/sandcrawler) to perform its dynamic scraping tasks. | ||
## Installation | ||
You can install **bothan.js** with npm. Note that by default, the library will install a correct version of phantomjs thanks to this [package](https://www.npmjs.com/package/phantomjs). | ||
```bash | ||
npm install bothan | ||
``` | ||
Or if you need the latest development version: | ||
```bash | ||
npm install git+https://github.com/medialab/bothan.git | ||
``` | ||
## Concept | ||
**bothan.js** communicates with its phantom child processes through a websocket server. | ||
It does so without needing to accessing a dummy webpage on the phantom side since phantom main JavaScript context is perfectly able to handle websockets. | ||
This dramatically enhance stability of the communication between node and phantom children. | ||
## Bindings | ||
However, bothan is just providing a simple way to spawn phantom and to communicate with them. So, if you want to be able to send messages to your phantoms and them to react accordingly, you must pass bindings to them. | ||
Bindings are just expressed in a script written thusly: | ||
```js | ||
module.exports = function(parent, params) { | ||
// Hello | ||
parent.on('hello', function() { | ||
console.log('Hello world!'); | ||
}); | ||
}; | ||
``` | ||
## Usage | ||
### Deploying a phantom | ||
```js | ||
var bothan = require('bothan'); | ||
bothan.deploy(function(err, phantom) { | ||
phantom.send('message', {hello: 'world'}); | ||
}); | ||
// With parameters | ||
bothan.deploy({path: './bin/customphantomjs'}, function(err, phantom) { | ||
//... | ||
}); | ||
``` | ||
### Methods | ||
#### Send | ||
Sends a message to the phantom child to receive. | ||
```js | ||
phantom.send(head, body); | ||
``` | ||
#### Request | ||
Request something from the phantom child. | ||
```js | ||
phantom.request(head, body, params, function(err, response) { | ||
// Deal with error | ||
if (err) | ||
// ... | ||
// Handle response | ||
console.log(response); | ||
}); | ||
// Alternate signatures | ||
phantom.request(head, callback); | ||
phantom.request(head, body, callback); | ||
// Cancel a request | ||
var call = phantom.request(...); | ||
call.cancel(); | ||
``` | ||
*Parameters*: | ||
* **timeout** *?integer* [`2000`]: time in milliseconds before request timeouts. | ||
#### ReplyTo | ||
Reply to one side's request. | ||
```js | ||
phantom.replyTo(id, data); | ||
``` | ||
#### Kill | ||
Kill a phantom child. | ||
```js | ||
phantom.kill(); | ||
``` | ||
#### Restart | ||
Restarting a phantom child. | ||
```js | ||
phantom.restart(); | ||
``` | ||
### Events | ||
Phantom children wrappers as offered by **bothan.js** are event emitters so you can listen to various events. | ||
*Events* | ||
* **ready**: fires when the phantom child is ready or ready again (specially after a restart). | ||
* **log**: fires when the phantom child logs something to stdout. | ||
* **error**: fires when the phantom child prints an error or ouptuts to stderr. | ||
* **close**: fires when the phantom child closes. | ||
* **crash**: fires when the pantom child crashes. | ||
* **?anyMessage?**: fires when the phantom child emits a message through its web socket. | ||
*Example* | ||
Note that event emitting is done through node's core events [module](http://nodejs.org/api/events.html). | ||
```js | ||
phantom.on('crash', function() { | ||
console.log('Phantom child crashed.'); | ||
}); | ||
``` | ||
### Options | ||
* **args** *?object*: camel-cased [arguments](http://phantomjs.org/api/command-line.html) to pass to the phantom child. | ||
* **autoRestart** *?boolean* [`false`]: should the phantom child try to restart on crash? | ||
* **bindings** *?string*: path of script to pass to the phantom child so you can communicate with it. | ||
* **data** *?object*: arbitrary parameters to pass to the phantom child and accessible in the bindings. | ||
* **handshakeTimeout** *?integer* [`5000`]: time allowed in milliseconds to perform the handshake with the phantom child. | ||
* **name** *?string*: an optional name to give to the phantom child. | ||
* **path** *?string*: path of a custom `phantomjs` binary. | ||
### Global bothan configuration | ||
```js | ||
var bothan = require('bothan'); | ||
// Changing the default port on which bothan is communicating | ||
bothan.config({port: 5647}); | ||
``` | ||
## Roadmap | ||
* Clusters | ||
* Better messenging | ||
* Better restarts | ||
* Better encapsulation | ||
## Contribution | ||
[![Build Status](https://travis-ci.org/medialab/bothan.svg)](https://travis-ci.org/medialab/bothan) | ||
WIP | ||
Contributions are more than welcome. Feel free to submit any pull request as long as you added unit tests if relevant and passed them all. | ||
To install the development environment, clone your fork and use the following commands: | ||
```bash | ||
# Install dependencies | ||
npm install | ||
# Testing | ||
npm test | ||
``` | ||
## Authors | ||
**bothan.js** is being developed by [Guillaume Plique](https://github.com/Yomguithereal) @ SciencesPo - [médialab](http://www.medialab.sciences-po.fr/fr/). |
@@ -9,2 +9,3 @@ /** | ||
module.exports = { | ||
autoRestart: false, | ||
handshakeTimeout: 5000, | ||
@@ -11,0 +12,0 @@ passphrase: 'Many bothans died to bring us this information.', |
@@ -8,2 +8,10 @@ /** | ||
// Generating a uuid v4 - not robust, should improve | ||
function uuid() { | ||
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { | ||
var r = Math.random() * 16 | 0, v = c == 'x' ? r : (r & 0x3 | 0x8); | ||
return v.toString(16); | ||
}); | ||
} | ||
// Is the var a plain object? | ||
@@ -53,2 +61,107 @@ function isPlainObject(v) { | ||
var DEFAULT_REQUEST_TIMEOUT = 2000; | ||
// Expect an answer from an asynchronous request | ||
// TODO: this feels somewhat leaky | ||
function request(socket, head, body, params, callback) { | ||
params = params || {}; | ||
// Handling polymorphism | ||
if (arguments.length < 4) { | ||
callback = body; | ||
params = {}; | ||
} | ||
else if (arguments.length < 5) { | ||
callback = params; | ||
params = {}; | ||
} | ||
// Safeguard | ||
if (typeof callback !== 'function') | ||
throw Error('bothan.helpers.request: no callback supplied.'); | ||
// Event functions | ||
var on = !socket.on ? | ||
function(l) { | ||
return socket.addEventListener('message', l); | ||
} : | ||
function(l) { | ||
return socket.on('message', l); | ||
}; | ||
var off = socket.removeEventListener ? | ||
function(l) { | ||
return socket.removeEventListener('message', l); | ||
} : | ||
function(l) { | ||
return socket.removeListener('message', l); | ||
}; | ||
// Unique identifier for this call | ||
var id = uuid(); | ||
// Teardown helper | ||
function teardown() { | ||
off(listener); | ||
clearTimeout(timeout); | ||
cancel = Function.prototype; | ||
} | ||
// Timeout | ||
var timeout = setTimeout(function() { | ||
teardown(); | ||
return callback(new Error('timeout')); | ||
}, params.timeout || DEFAULT_REQUEST_TIMEOUT); | ||
// Declaring outcomes | ||
var listener = function(event) { | ||
var message = JSON.parse(typeof event === 'string' ? event : event.data); | ||
// Solving | ||
if (message.id === id) { | ||
teardown(); | ||
return callback(null, message); | ||
} | ||
}; | ||
on(listener); | ||
// Sending message | ||
var sendArgs = [JSON.stringify({ | ||
id: id, | ||
head: head, | ||
body: body | ||
})]; | ||
if (!global.phantom) | ||
sendArgs.push(function(err) { | ||
if (!err) | ||
return; | ||
teardown(); | ||
return callback(err); | ||
}); | ||
socket.send.apply(socket, sendArgs); | ||
// Returning handful object | ||
var cancel = function() { | ||
teardown(); | ||
return callback(new Error('canceled')); | ||
}; | ||
return { | ||
id: id, | ||
cancel: cancel | ||
}; | ||
} | ||
// Replying to a request | ||
function replyTo(socket, id, data) { | ||
return socket.send(JSON.stringify({ | ||
id: id, | ||
body: data | ||
})); | ||
} | ||
module.exports = { | ||
@@ -58,3 +171,6 @@ camelToHyphen: camelToHyphen, | ||
range: range, | ||
toCLIArgs: toCLIArgs | ||
replyTo: replyTo, | ||
request: request, | ||
toCLIArgs: toCLIArgs, | ||
uuid: uuid | ||
}; |
@@ -9,8 +9,4 @@ /** | ||
async = require('async'), | ||
cp = require('child_process'), | ||
fs = require('fs'), | ||
util = require('util'), | ||
uuid = require('uuid'), | ||
EventEmitter = require('events').EventEmitter, | ||
phantomjs = require('phantomjs'), | ||
Spy = require('./spy.js'), | ||
helpers = require('../shared/helpers.js'), | ||
@@ -20,93 +16,2 @@ spynet = require('./spynet.js'), | ||
// Spy class | ||
function Spy(name, args, params) { | ||
var self = this; | ||
// Extending an Event Emitter | ||
EventEmitter.call(this); | ||
// Properties | ||
this.name = name; | ||
this.args = args; | ||
this.params = params; | ||
this.phantom = null; | ||
// Binding some of the messenger methods | ||
this.messenger = spynet.messenger.conversation(name); | ||
this.processHandle = function() { | ||
self.kill(); | ||
}; | ||
// Killing the child process with parent | ||
process.on('exit', this.processHandle); | ||
} | ||
util.inherits(Spy, EventEmitter); | ||
// Spy Prototype | ||
Spy.prototype.start = function(callback) { | ||
var self = this; | ||
// Waiting for handshake | ||
function handle(data, reply) { | ||
clearTimeout(failureTimeout); | ||
reply({ok: true}); | ||
callback(null); | ||
} | ||
var failureTimeout = setTimeout(function() { | ||
self.kill(); | ||
callback(new Error('handshake-timeout')); | ||
}, this.params.handshakeTimeout || config.handshakeTimeout); | ||
// TODO: kill this listener somewhat? | ||
this.messenger.once('handshake', handle); | ||
// Spawning child process | ||
this.phantom = cp.execFile(this.params.path || phantomjs.path, this.args); | ||
// On stdout | ||
this.phantom.stdout.on('data', function(data) { | ||
data = data.substring(0, data.length - 1); | ||
if (~data.search(/Error:/)) | ||
self.emit('phantom:error', data); | ||
else | ||
self.emit('phantom:log', data); | ||
}); | ||
// On stderr | ||
this.phantom.stderr.on('data', function(data) { | ||
self.emit('phantom:error', data); | ||
}); | ||
// On close | ||
this.phantom.once('close', function(code, signal) { | ||
self.emit('phantom:close', code, signal); | ||
if (code !== 0 && code !== null) | ||
self.emit('phantom:crash', code); | ||
}); | ||
return this; | ||
}; | ||
Spy.prototype.kill = function(noDrop) { | ||
// Removing from spynet | ||
if (noDrop !== false) | ||
spynet.dropSpy(this.name); | ||
// Killing the child process | ||
process.removeListener('exit', this.processHandle); | ||
this.phantom.kill(); | ||
}; | ||
Spy.prototype.restart = function(callback) { | ||
this.kill(false); | ||
this.start(callback); | ||
}; | ||
// Spawner | ||
module.exports = function(params, callback) { | ||
@@ -148,3 +53,3 @@ if (arguments.length < 2) { | ||
// Giving a name | ||
var name = params.name || 'Spy[' + uuid.v4() + ']'; | ||
var name = params.name || 'Spy[' + helpers.uuid() + ']'; | ||
@@ -151,0 +56,0 @@ // Composing unix command |
@@ -15,3 +15,4 @@ /** | ||
EventEmitter = require('events').EventEmitter, | ||
Messenger = require('estafet'), | ||
util = require('util'), | ||
helpers = require('../shared/helpers.js'), | ||
config = require('../shared/config.js'); | ||
@@ -23,4 +24,6 @@ | ||
// Extending EventEmitter | ||
EventEmitter.call(this); | ||
// Properties | ||
this.ee = new EventEmitter(); | ||
this.name = 'Spynet'; | ||
@@ -30,3 +33,2 @@ this.port = config.port; | ||
this.server = null; | ||
this.messenger = null; | ||
this.running = false; | ||
@@ -38,2 +40,4 @@ this.hanging = false; | ||
util.inherits(Spynet, EventEmitter); | ||
// Prototype | ||
@@ -55,3 +59,3 @@ Spynet.prototype.listen = function(port, errback) { | ||
if (this.hanging) { | ||
return this.ee.once('listened', function(err) { | ||
return this.once('listened', function(err) { | ||
if (!err) | ||
@@ -72,3 +76,3 @@ errback(null); | ||
this.server.once('error', function(err) { | ||
self.ee.emit('listened', err); | ||
self.emit('listened', err); | ||
self.hanging = false; | ||
@@ -91,17 +95,2 @@ self.running = false; | ||
// Building the messenger | ||
self.messenger = new Messenger(self.name, { | ||
emitter: function(data, to) { | ||
var str = JSON.stringify(data); | ||
if (!to) | ||
self.server.broadcast(str); | ||
else | ||
self.spies[to].socket.send(str); | ||
}, | ||
receptor: function(callback) { | ||
self.ee.on('message', callback); | ||
} | ||
}); | ||
// On socket connection | ||
@@ -111,13 +100,11 @@ self.server.on('connection', function(socket) { | ||
// On incoming message | ||
socket.on('message', function(str) { | ||
var data = JSON.parse(str); | ||
socket.once('message', function(msg) { | ||
var parsedMsg = JSON.parse(msg); | ||
// Registering socket | ||
self.addSpy(data.from, socket); | ||
self.ee.emit('message', data); | ||
self.addSpy(parsedMsg.body.from, parsedMsg.id, socket); | ||
}); | ||
}); | ||
self.ee.emit('listened'); | ||
self.emit('listened'); | ||
@@ -128,6 +115,26 @@ errback(null); | ||
Spynet.prototype.addSpy = function(name, socket) { | ||
Spynet.prototype.waitForHandshake = function(spy, timeout, callback) { | ||
var self = this, | ||
name = spy.name + ':handshake'; | ||
var failure = setTimeout(function() { | ||
self.removeListener(name, listener); | ||
callback(new Error('handshake-timeout')); | ||
}, timeout); | ||
var listener = function(reqId) { | ||
var socket = self.spies[spy.name].socket; | ||
clearTimeout(failure); | ||
helpers.replyTo(socket, reqId); | ||
callback(null, socket); | ||
}; | ||
this.once(name, listener); | ||
}; | ||
Spynet.prototype.addSpy = function(name, reqId, socket) { | ||
this.spies[name] = { | ||
socket: socket | ||
}; | ||
this.emit(name + ':handshake', reqId); | ||
return this; | ||
@@ -152,8 +159,4 @@ }; | ||
// Shooting messenger | ||
this.messenger.shoot(); | ||
this.messenger = null; | ||
// Dropping listeners | ||
this.ee.removeAllListeners(); | ||
this.removeAllListeners(); | ||
@@ -160,0 +163,0 @@ // Dropping spies |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Git dependency
Supply chain riskContains a dependency which resolves to a remote git URL. Dependencies fetched from git URLs are not immutable and can be used to inject untrusted code or reduce the likelihood of a reproducible install.
Found 1 instance in 1 package
24256
4
2
12
685
189
0
+ Addedevents@^1.0.2
+ Addedbindings@1.2.1(transitive)
+ Addedbufferutil@1.1.0(transitive)
+ Addedevents@1.1.1(transitive)
+ Addednan@1.8.4(transitive)
+ Addedultron@1.0.2(transitive)
+ Addedutf-8-validate@1.1.0(transitive)
+ Addedws@0.7.2(transitive)
- Removedestafet@git+https://github.com/Yomguithereal/estafet.git
- Removedlodash@^2.4.1
- Removeduuid@~2.0.1
- Removedcommander@2.1.0(transitive)
- Removedlodash@2.4.2(transitive)
- Removednan@1.0.0(transitive)
- Removedtinycolor@0.0.1(transitive)
- Removeduuid@2.0.3(transitive)
- Removedws@0.4.32(transitive)
Updatedws@^0.7.0