Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

bothan

Package Overview
Dependencies
Maintainers
1
Versions
2
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

bothan - npm Package Compare versions

Comparing version 0.1.0 to 0.2.0

phantom/polyfills.js

2

index.js

@@ -38,5 +38,5 @@ /**

Object.defineProperty(bothan, 'version', {
value: '0.1.0'
value: '0.2.0'
});
module.exports = bothan;
{
"name": "bothan",
"version": "0.1.0",
"description": "A node.js phantom controller for scraping purposes.",
"version": "0.2.0",
"description": "A node.js phantom interface for scraping purposes.",
"main": "index.js",
"scripts": {
"test": "gulp test"
"test": "mocha -R spec ./test/endpoint.js",
"lint": "jshint src && jshint test && jshint shared && jshint phantom"
},

@@ -27,14 +28,10 @@ "repository": {

"async": "^0.9.0",
"estafet": "git+https://github.com/Yomguithereal/estafet.git",
"lodash": "^2.4.1",
"events": "^1.0.2",
"phantomjs": "~1.9.10",
"uuid": "~2.0.1",
"ws": "~0.4.32"
"ws": "^0.7.0"
},
"devDependencies": {
"gulp": "^3.8.9",
"gulp-jshint": "^1.8.6",
"gulp-mocha": "^1.1.1",
"mocha": "~1.21.4"
"jshint": "^2.6.0",
"mocha": "~2.1.0"
}
}

@@ -8,5 +8,5 @@ /**

*/
var Messenger = require('estafet'),
helpers = require('../shared/helpers.js'),
config = require('../shared/config.js');
var helpers = require('../shared/helpers.js'),
config = require('../shared/config.js'),
EventEmitter = require('events').EventEmitter;

@@ -27,6 +27,6 @@ // Helpers

this.connected = false;
this.name = null;
this.ee = new EventEmitter();
// Methods
// TODO: implement a timeout
// TODO: implement a phantom-side timeout
this.setup = function(params, next) {

@@ -42,23 +42,35 @@ params = params || {};

// Creating messenger
self.messenger = new Messenger(params.name, {
receptor: function(callback) {
self.ws.onmessage = function(msg) {
callback(JSON.parse(msg.data));
};
},
emitter: function(data) {
self.ws.send(JSON.stringify(data));
}
// Event delegation
function delegate(name) {
return self.ee[name].bind(self.ee);
}
self.ws.addEventListener('message', function(msg) {
var parsedMsg = JSON.parse(msg.data);
self.ee.emit(parsedMsg.head, parsedMsg);
});
self.parent = self.messenger.conversation('Spynet');
// Constructing parent abstraction
self.parent = {
request: helpers.request.bind(null, self.ws),
send: function(head, body) {
self.ws.send(JSON.stringify({
from: params.name,
head: head,
body: body
}));
},
replyTo: helpers.replyTo.bind(null, self.ws),
on: delegate('on'),
once: delegate('once'),
removeListener: delegate('removeListener')
};
// Perform tricks here
// NOTE: executing binding here to avoid racing conditions
if (params.bindings)
require(params.bindings)(self.parent, params.data);
// Performing handshake
self.parent.request('handshake', next);
// Handshake
self.parent.request('handshake', {from: params.name}, function(err) {
if (err)
return next(err);
else
return next(null, self.parent);
});
};

@@ -65,0 +77,0 @@ };

@@ -12,2 +12,6 @@ /**

// Injecting polyfills
if (phantom.version.major < 2)
require('./polyfills.js');
// Checking CLI arguments so we get proper configuration from parent

@@ -25,3 +29,3 @@ var params = JSON.parse(args[1]);

// Setup the comlink
comlink.setup(params, function(err) {
comlink.setup(params, function(err, parentObject) {

@@ -31,2 +35,6 @@ // If the socket server timed out, we exit

return phantom.exit(1);
// Executing bindings
if (params.bindings)
require(params.bindings)(parentObject, params.data);
});
# Bothan
**bothan.js** is a low-level [phantomjs](http://phantomjs.org/) controller that can be used with node.js and initially intended to perform scraping tasks.
This controller is used by [sandcrawler](https://github.com/medialab/sandcrawler) to perform its dynamic scraping tasks.
## Installation
You can install **bothan.js** with npm. Note that by default, the library will install a correct version of phantomjs thanks to this [package](https://www.npmjs.com/package/phantomjs).
```bash
npm install bothan
```
Or if you need the latest development version:
```bash
npm install git+https://github.com/medialab/bothan.git
```
## Concept
**bothan.js** communicates with its phantom child processes through a websocket server.
It does so without needing to accessing a dummy webpage on the phantom side since phantom main JavaScript context is perfectly able to handle websockets.
This dramatically enhance stability of the communication between node and phantom children.
## Bindings
However, bothan is just providing a simple way to spawn phantom and to communicate with them. So, if you want to be able to send messages to your phantoms and them to react accordingly, you must pass bindings to them.
Bindings are just expressed in a script written thusly:
```js
module.exports = function(parent, params) {
// Hello
parent.on('hello', function() {
console.log('Hello world!');
});
};
```
## Usage
### Deploying a phantom
```js
var bothan = require('bothan');
bothan.deploy(function(err, phantom) {
phantom.send('message', {hello: 'world'});
});
// With parameters
bothan.deploy({path: './bin/customphantomjs'}, function(err, phantom) {
//...
});
```
### Methods
#### Send
Sends a message to the phantom child to receive.
```js
phantom.send(head, body);
```
#### Request
Request something from the phantom child.
```js
phantom.request(head, body, params, function(err, response) {
// Deal with error
if (err)
// ...
// Handle response
console.log(response);
});
// Alternate signatures
phantom.request(head, callback);
phantom.request(head, body, callback);
// Cancel a request
var call = phantom.request(...);
call.cancel();
```
*Parameters*:
* **timeout** *?integer* [`2000`]: time in milliseconds before request timeouts.
#### ReplyTo
Reply to one side's request.
```js
phantom.replyTo(id, data);
```
#### Kill
Kill a phantom child.
```js
phantom.kill();
```
#### Restart
Restarting a phantom child.
```js
phantom.restart();
```
### Events
Phantom children wrappers as offered by **bothan.js** are event emitters so you can listen to various events.
*Events*
* **ready**: fires when the phantom child is ready or ready again (specially after a restart).
* **log**: fires when the phantom child logs something to stdout.
* **error**: fires when the phantom child prints an error or ouptuts to stderr.
* **close**: fires when the phantom child closes.
* **crash**: fires when the pantom child crashes.
* **?anyMessage?**: fires when the phantom child emits a message through its web socket.
*Example*
Note that event emitting is done through node's core events [module](http://nodejs.org/api/events.html).
```js
phantom.on('crash', function() {
console.log('Phantom child crashed.');
});
```
### Options
* **args** *?object*: camel-cased [arguments](http://phantomjs.org/api/command-line.html) to pass to the phantom child.
* **autoRestart** *?boolean* [`false`]: should the phantom child try to restart on crash?
* **bindings** *?string*: path of script to pass to the phantom child so you can communicate with it.
* **data** *?object*: arbitrary parameters to pass to the phantom child and accessible in the bindings.
* **handshakeTimeout** *?integer* [`5000`]: time allowed in milliseconds to perform the handshake with the phantom child.
* **name** *?string*: an optional name to give to the phantom child.
* **path** *?string*: path of a custom `phantomjs` binary.
### Global bothan configuration
```js
var bothan = require('bothan');
// Changing the default port on which bothan is communicating
bothan.config({port: 5647});
```
## Roadmap
* Clusters
* Better messenging
* Better restarts
* Better encapsulation
## Contribution
[![Build Status](https://travis-ci.org/medialab/bothan.svg)](https://travis-ci.org/medialab/bothan)
WIP
Contributions are more than welcome. Feel free to submit any pull request as long as you added unit tests if relevant and passed them all.
To install the development environment, clone your fork and use the following commands:
```bash
# Install dependencies
npm install
# Testing
npm test
```
## Authors
**bothan.js** is being developed by [Guillaume Plique](https://github.com/Yomguithereal) @ SciencesPo - [médialab](http://www.medialab.sciences-po.fr/fr/).

@@ -9,2 +9,3 @@ /**

module.exports = {
autoRestart: false,
handshakeTimeout: 5000,

@@ -11,0 +12,0 @@ passphrase: 'Many bothans died to bring us this information.',

@@ -8,2 +8,10 @@ /**

// Generating a uuid v4 - not robust, should improve
function uuid() {
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
var r = Math.random() * 16 | 0, v = c == 'x' ? r : (r & 0x3 | 0x8);
return v.toString(16);
});
}
// Is the var a plain object?

@@ -53,2 +61,107 @@ function isPlainObject(v) {

var DEFAULT_REQUEST_TIMEOUT = 2000;
// Expect an answer from an asynchronous request
// TODO: this feels somewhat leaky
function request(socket, head, body, params, callback) {
params = params || {};
// Handling polymorphism
if (arguments.length < 4) {
callback = body;
params = {};
}
else if (arguments.length < 5) {
callback = params;
params = {};
}
// Safeguard
if (typeof callback !== 'function')
throw Error('bothan.helpers.request: no callback supplied.');
// Event functions
var on = !socket.on ?
function(l) {
return socket.addEventListener('message', l);
} :
function(l) {
return socket.on('message', l);
};
var off = socket.removeEventListener ?
function(l) {
return socket.removeEventListener('message', l);
} :
function(l) {
return socket.removeListener('message', l);
};
// Unique identifier for this call
var id = uuid();
// Teardown helper
function teardown() {
off(listener);
clearTimeout(timeout);
cancel = Function.prototype;
}
// Timeout
var timeout = setTimeout(function() {
teardown();
return callback(new Error('timeout'));
}, params.timeout || DEFAULT_REQUEST_TIMEOUT);
// Declaring outcomes
var listener = function(event) {
var message = JSON.parse(typeof event === 'string' ? event : event.data);
// Solving
if (message.id === id) {
teardown();
return callback(null, message);
}
};
on(listener);
// Sending message
var sendArgs = [JSON.stringify({
id: id,
head: head,
body: body
})];
if (!global.phantom)
sendArgs.push(function(err) {
if (!err)
return;
teardown();
return callback(err);
});
socket.send.apply(socket, sendArgs);
// Returning handful object
var cancel = function() {
teardown();
return callback(new Error('canceled'));
};
return {
id: id,
cancel: cancel
};
}
// Replying to a request
function replyTo(socket, id, data) {
return socket.send(JSON.stringify({
id: id,
body: data
}));
}
module.exports = {

@@ -58,3 +171,6 @@ camelToHyphen: camelToHyphen,

range: range,
toCLIArgs: toCLIArgs
replyTo: replyTo,
request: request,
toCLIArgs: toCLIArgs,
uuid: uuid
};

@@ -9,8 +9,4 @@ /**

async = require('async'),
cp = require('child_process'),
fs = require('fs'),
util = require('util'),
uuid = require('uuid'),
EventEmitter = require('events').EventEmitter,
phantomjs = require('phantomjs'),
Spy = require('./spy.js'),
helpers = require('../shared/helpers.js'),

@@ -20,93 +16,2 @@ spynet = require('./spynet.js'),

// Spy class
function Spy(name, args, params) {
var self = this;
// Extending an Event Emitter
EventEmitter.call(this);
// Properties
this.name = name;
this.args = args;
this.params = params;
this.phantom = null;
// Binding some of the messenger methods
this.messenger = spynet.messenger.conversation(name);
this.processHandle = function() {
self.kill();
};
// Killing the child process with parent
process.on('exit', this.processHandle);
}
util.inherits(Spy, EventEmitter);
// Spy Prototype
Spy.prototype.start = function(callback) {
var self = this;
// Waiting for handshake
function handle(data, reply) {
clearTimeout(failureTimeout);
reply({ok: true});
callback(null);
}
var failureTimeout = setTimeout(function() {
self.kill();
callback(new Error('handshake-timeout'));
}, this.params.handshakeTimeout || config.handshakeTimeout);
// TODO: kill this listener somewhat?
this.messenger.once('handshake', handle);
// Spawning child process
this.phantom = cp.execFile(this.params.path || phantomjs.path, this.args);
// On stdout
this.phantom.stdout.on('data', function(data) {
data = data.substring(0, data.length - 1);
if (~data.search(/Error:/))
self.emit('phantom:error', data);
else
self.emit('phantom:log', data);
});
// On stderr
this.phantom.stderr.on('data', function(data) {
self.emit('phantom:error', data);
});
// On close
this.phantom.once('close', function(code, signal) {
self.emit('phantom:close', code, signal);
if (code !== 0 && code !== null)
self.emit('phantom:crash', code);
});
return this;
};
Spy.prototype.kill = function(noDrop) {
// Removing from spynet
if (noDrop !== false)
spynet.dropSpy(this.name);
// Killing the child process
process.removeListener('exit', this.processHandle);
this.phantom.kill();
};
Spy.prototype.restart = function(callback) {
this.kill(false);
this.start(callback);
};
// Spawner
module.exports = function(params, callback) {

@@ -148,3 +53,3 @@ if (arguments.length < 2) {

// Giving a name
var name = params.name || 'Spy[' + uuid.v4() + ']';
var name = params.name || 'Spy[' + helpers.uuid() + ']';

@@ -151,0 +56,0 @@ // Composing unix command

@@ -15,3 +15,4 @@ /**

EventEmitter = require('events').EventEmitter,
Messenger = require('estafet'),
util = require('util'),
helpers = require('../shared/helpers.js'),
config = require('../shared/config.js');

@@ -23,4 +24,6 @@

// Extending EventEmitter
EventEmitter.call(this);
// Properties
this.ee = new EventEmitter();
this.name = 'Spynet';

@@ -30,3 +33,2 @@ this.port = config.port;

this.server = null;
this.messenger = null;
this.running = false;

@@ -38,2 +40,4 @@ this.hanging = false;

util.inherits(Spynet, EventEmitter);
// Prototype

@@ -55,3 +59,3 @@ Spynet.prototype.listen = function(port, errback) {

if (this.hanging) {
return this.ee.once('listened', function(err) {
return this.once('listened', function(err) {
if (!err)

@@ -72,3 +76,3 @@ errback(null);

this.server.once('error', function(err) {
self.ee.emit('listened', err);
self.emit('listened', err);
self.hanging = false;

@@ -91,17 +95,2 @@ self.running = false;

// Building the messenger
self.messenger = new Messenger(self.name, {
emitter: function(data, to) {
var str = JSON.stringify(data);
if (!to)
self.server.broadcast(str);
else
self.spies[to].socket.send(str);
},
receptor: function(callback) {
self.ee.on('message', callback);
}
});
// On socket connection

@@ -111,13 +100,11 @@ self.server.on('connection', function(socket) {

// On incoming message
socket.on('message', function(str) {
var data = JSON.parse(str);
socket.once('message', function(msg) {
var parsedMsg = JSON.parse(msg);
// Registering socket
self.addSpy(data.from, socket);
self.ee.emit('message', data);
self.addSpy(parsedMsg.body.from, parsedMsg.id, socket);
});
});
self.ee.emit('listened');
self.emit('listened');

@@ -128,6 +115,26 @@ errback(null);

Spynet.prototype.addSpy = function(name, socket) {
Spynet.prototype.waitForHandshake = function(spy, timeout, callback) {
var self = this,
name = spy.name + ':handshake';
var failure = setTimeout(function() {
self.removeListener(name, listener);
callback(new Error('handshake-timeout'));
}, timeout);
var listener = function(reqId) {
var socket = self.spies[spy.name].socket;
clearTimeout(failure);
helpers.replyTo(socket, reqId);
callback(null, socket);
};
this.once(name, listener);
};
Spynet.prototype.addSpy = function(name, reqId, socket) {
this.spies[name] = {
socket: socket
};
this.emit(name + ':handshake', reqId);
return this;

@@ -152,8 +159,4 @@ };

// Shooting messenger
this.messenger.shoot();
this.messenger = null;
// Dropping listeners
this.ee.removeAllListeners();
this.removeAllListeners();

@@ -160,0 +163,0 @@ // Dropping spies

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc