Comparing version 0.2.12 to 0.3.2
@@ -9,58 +9,8 @@ | ||
const program = require('commander') | ||
const path = require('path') | ||
const guvnor = require('guvnor').Local | ||
const env = process.env.NODE_ENV || "development" | ||
const dirApp=process.cwd(), meta = require(path.join(dirApp,"meta.json")), clientName = "client.js", workerName="worker.js"; | ||
let App = null, app=null, entry=null,appPath = path.join(dirApp,"lib"), processOptions = {env:env,name:meta.name}; | ||
program | ||
.version('0.0.3') | ||
.option('-c, --client', 'run as client') | ||
.option('-w, --worker', 'run as worker') | ||
.option('-r, --recover','recover mode, read seed from database') | ||
.option('-i, --init','generate scaffold') | ||
.option('-n, --name<name>','app to run') | ||
.version('0.1.0') | ||
.command('init','generate scaffold') | ||
.command('start [mode]','start in worker or client mode.') | ||
.parse(process.argv); | ||
console.log('run floodesh as:'); | ||
if (program.client) { | ||
console.log(' - client'); | ||
appPath = path.join(appPath,clientName); | ||
processOptions.name += clientName; | ||
} | ||
if (program.worker){ | ||
console.log(' - worker'); | ||
appPath = path.join(appPath,workerName); | ||
processOptions.name += workerName; | ||
} | ||
// app = new App(require(path.join(dirApp,configFile))[env]); | ||
// app.attach(new entry()).start(); | ||
guvnor.connectOrStart(function(error, daemon) { | ||
if(error) throw error; | ||
console.log("guv daemon connected"); | ||
daemon.startProcess(appPath,processOptions,function(error, processInfo){ | ||
if(error){ | ||
console.error(error); | ||
throw error; | ||
} | ||
console.log("process starting..."); | ||
daemon.on('process:ready', function(readyProcessInfo) { | ||
if(processInfo.id == readyProcessInfo.id) { | ||
console.log("process has now started [%d]",readyProcessInfo.pid); | ||
}else{ | ||
console.error("error when starting process"); | ||
} | ||
daemon.disconnect(function (error) { | ||
if(error) throw error; | ||
console.log("guv daemon disconnected"); | ||
}) | ||
}) | ||
}); | ||
}) |
@@ -38,3 +38,3 @@ | ||
logClient.add(winston.transports.File, { filename: './gearman.client.log',logstash:true,level:'info',handleExceptions: false }); | ||
//logClient.add(winston.transports.File, { filename: './gearman.client.log',logstash:true,level:'info',handleExceptions: false }); | ||
@@ -41,0 +41,0 @@ const JOB_END = 'end' |
@@ -72,3 +72,2 @@ | ||
.getter('origin') | ||
//.getter('subdomains') | ||
@@ -75,0 +74,0 @@ .getter('protocol') |
@@ -44,2 +44,3 @@ | ||
this.on("parsed", ctx => { | ||
ctx.performance.parsedTimestamp = Date.now(); | ||
if( ctx.next().value === ctx.PARSED) { | ||
@@ -105,4 +106,5 @@ self.parsedmw.callback( ctx => self.emit("process",ctx), err => {self.emit('error.middleware',err); } )(ctx); | ||
let self = this; | ||
let enqueueTimestamp = Date.now(); | ||
this._scheduler.key(opt.limiter||"default").submit( opt.priority||1 ,done => { | ||
let ctx = self._createContext(opt); | ||
let ctx = self._createContext(opt, enqueueTimestamp); | ||
ctx.done = done; | ||
@@ -123,3 +125,3 @@ if(ctx.next().value === ctx.REQ){ | ||
_createContext(task){ | ||
_createContext(task, enqueueTimestamp){ | ||
const ctx = Context(); | ||
@@ -136,4 +138,12 @@ const rq = ctx.request = Object.create(request); | ||
ctx.tasks = []; | ||
ctx.performance = { | ||
enqueueTimestamp:enqueueTimestamp, | ||
bottleneckTimestamp:Date.now(), | ||
requestTimestamp:null, | ||
responseTimestamp:null, | ||
responsemwTimestamp:null, | ||
parsedTimestamp:null | ||
}; | ||
return ctx; | ||
} | ||
} |
@@ -24,3 +24,5 @@ | ||
let self = this; | ||
ctx.performance.requestTimestamp = Date.now(); | ||
let req = request(ctx.opt, (err,res) => { | ||
ctx.performance.responseTimestamp = Date.now(); | ||
if(err) { | ||
@@ -59,2 +61,3 @@ console.error(err.stack); | ||
let self = this; | ||
ctx.performance.responsemwTimestamp = Date.now(); | ||
ctx.parse.call( this.app,ctx, () => self.emit("parsed",ctx) ); | ||
@@ -124,3 +127,3 @@ } | ||
this._w.close(); | ||
process.exit(0); | ||
//process.exit(0); | ||
} | ||
@@ -127,0 +130,0 @@ |
{ | ||
"name": "floodesh", | ||
"version": "0.2.12", | ||
"version": "0.3.2", | ||
"description": "Floodesh is a distributed web spider/crawler written with Nodejs.", | ||
@@ -5,0 +5,0 @@ "bin": "./bin/floodesh", |
177
README.md
@@ -1,28 +0,177 @@ | ||
# floodesh | ||
Floodesh is middleware based web spider written with Nodejs. Floodesh is word of `flood` + `mesh` | ||
# Floodesh | ||
Floodesh is middleware based web spider written with Nodejs. "Floodesh" is a combination of two words, `flood` and `mesh`. | ||
# Requirement | ||
* [Gearman Server](http://gearman.org/) | ||
* [MongoDB](https://www.mongodb.org/) | ||
## Gearman Server Installation | ||
Make sure `libboost-all-dev`, `gperf`, `libevent-dev` and `uuid-dev` have been installed in you system. | ||
wget https://launchpad.net/gearmand/1.2/1.1.12/+download/gearmand-1.1.12.tar.gz | tar zxf | ||
cd gearmand-1.1.12 | ||
./configure | ||
make | ||
make install | ||
# Install | ||
$ npm install floodesh | ||
$ npm install -g floodesh | ||
# Useage | ||
Before you use floodesh make sure you have [gearman](http://gearman.org/) server running on localhost | ||
Generate new app from templates by only one command. | ||
$ mkdir floodesh_demo | ||
$ cd floodesh_demo | ||
$ floodesh --init | ||
$ floodesh init // all necessary files will be generated in your directory. | ||
$ npm install | ||
# Context | ||
A context instance is a kind of [Finite-State Machine](https://en.wikipedia.org/wiki/Finite-state_machine) implemented by `Generators` which is [ECMAScript 6](http://es6-features.org/#GeneratorFunctionIteratorProtocol) feature. By context, we can access almost all fields in `response` and `request`, like: | ||
```javascript | ||
worker.responsemw.use( (ctx,next) => { | ||
ctx.content = ctx.body.toString(); // totally do not care about the body | ||
return next(); | ||
}) | ||
``` | ||
First install Gearman | ||
` | ||
wget https://launchpad.net/gearmand/1.2/1.1.12/+download/gearmand-1.1.12.tar.gz | tar zxf | ||
cd gearmand-1.1.12 | ||
./configure | ||
make | ||
make install | ||
` | ||
## Request | ||
You may first install `libboost-all-dev`, `gperf`, `libevent-dev`, `uuid-dev` | ||
### ctx.is(types) | ||
* `type`s String|Array | ||
* Return: String|false|null | ||
Check if the incoming request contains the "Content-Type" header field, and it contains any of the give mime `type`s.If there is no request body, `null` is returned.If there is no content type, `false` is returned.Otherwise, it returns the first `type` that matches. | ||
### ctx.querystring | ||
* String | ||
Get querystring. | ||
### ctx.idempotent | ||
* Boolean | ||
Check if the request is idempotent. | ||
### ctx.search | ||
* String | ||
Get the search string. It includes the leading "?" compare to querystring. | ||
### ctx.method | ||
* String | ||
Get request method. | ||
### ctx.query | ||
* Object | ||
Get parsed query-string. | ||
### ctx.path | ||
* String | ||
Get the request pathname | ||
### ctx.url | ||
* String | ||
Return request url, the same as __ctx.href__. | ||
### ctx.origin | ||
* String | ||
Get the origin of URL, for instance, "https://www.google.com". | ||
### ctx.protocol | ||
* String | ||
Return the protocol string "http" or "https" | ||
### ctx.host | ||
* String, hostname:port | ||
Parse the "Host" header field host and support X-Forwarded-Host when a proxy is enabled. | ||
### ctx.hostname | ||
* String | ||
Parse the "Host" header field hostname and support X-Forwarded-Host when a proxy is enabled. | ||
### ctx.secure | ||
* Boolean | ||
Check if protocol is https. | ||
## Response | ||
### ctx.status | ||
* Number | ||
Get status code from response. | ||
### ctx.message | ||
* String | ||
Get status message from response. | ||
### ctx.body | ||
* Buffer | ||
Get the response body in Buffer. | ||
### ctx.length | ||
* Number | ||
Get length of response body. | ||
### ctx.type | ||
* String | ||
Get the response mime type, for instance, "text/html" | ||
### ctx.lastModifieds | ||
* Date | ||
Get the Last-Modified date in Date form, if it exists. | ||
### ctx.etag | ||
* String | ||
Get the ETag of a response. | ||
### ctx.header | ||
* Object | ||
Return the response header. | ||
### ctx.href | ||
* String | ||
### ctx.uri | ||
* String | ||
### ctx.contentType | ||
* String | ||
### ctx.get(key) | ||
* `key` String | ||
* Return: String | ||
Get value by key in response headers | ||
# Middlewares | ||
* [mof-cheerio](https://www.npmjs.com/package/mof-cheerio). A simple wrapper of `Cheerio` | ||
* [mof-charsetparser](https://www.npmjs.com/package/mof-charsetparser). Parse `Charset` in response headers | ||
* [mof-iconv](https://www.npmjs.com/package/mof-iconv). Encoding converter middleware using `iconv` or `iconv-lite` | ||
* [mof-reqnormalizer](https://www.npmjs.com/package/mof-normalizer). | ||
* [mof-reqadapter](https://www.npmjs.com/package/mof-reqadapter). | ||
* [mof-uarotate](https://www.npmjs.com/package/mof-uarotate). | ||
* [mof-seenreq](https://www.npmjs.com/package/mof-seenreq). Only make sense in [flowesh](https://www.npmjs.com/package/flowesh), a simple wrapper of `seenreq` | ||
* [mof-validbody](https://www.npmjs.com/package/mof-validbody). | ||
@@ -1,3 +0,18 @@ | ||
exports.module = `"use strict" | ||
` | ||
"use strict" | ||
/** | ||
* Module dependencies. | ||
*/ | ||
const Client = require("floodesh").Client | ||
const env = process.env.NODE_ENV || "development" | ||
const config = require("./config.json")[env] | ||
const App = require('./lib/client.js') | ||
/* | ||
* Attach app to `Client` instance | ||
* | ||
*/ | ||
new Client(config).attach(new App()).start(); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Uses eval
Supply chain riskPackage uses dynamic code execution (e.g., eval()), which is a dangerous practice. This can prevent the code from running in certain environments and increases the risk that the code may contain exploits or malicious behavior.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Misc. License Issues
License(Experimental) A package's licensing information has fine-grained problems.
Found 1 instance in 1 package
52733
32
0
1687
178
8
4