Comparing version 0.1.0 to 0.2.0
127
index.js
'use_strict'; | ||
var _ = require('underscore'), | ||
fs = require('fs'); | ||
fs = require('fs'), | ||
_this; | ||
function BigSEO(opts) { | ||
_this = this; | ||
this.TAG = "BigSEO"; | ||
this.opts = { | ||
log: true, | ||
log: process.env.NODE_ENV == 'production' ? false : true, | ||
cache_path: 'caches', | ||
@@ -16,37 +19,82 @@ cache_url: '/save/cache' | ||
this.ua = { | ||
"Ruby": true | ||
"Ruby": true, | ||
'undefined': true, | ||
"facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)": true, | ||
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true, | ||
"Mozilla/5.0 (compatible; Genieo/1.0 http://www.genieo.com/webfilter.html)": true, | ||
"Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com)": true, | ||
"Mozilla/5.0 (compatible; proximic; +http://www.proximic.com/info/spider.php)": true, | ||
"msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)": true, | ||
"Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)": true, | ||
"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)": true, | ||
"Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)": true, | ||
"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)": true, | ||
"Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/Robots/2.0; +http://go.mail.ru/help/robots)": true, | ||
"Mozilla/5.0 (compatible; URLAppendBot/1.0; +http://www.profound.net/urlappendbot.html)": true, | ||
"www.integromedb.org/Crawler": true, | ||
"Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)": true, | ||
"Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)": true, | ||
"Mozilla/5.0 (compatible; archive.org_bot +http://www.archive.org/details/archive.org_bot)": true, | ||
"Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/)": true, | ||
"rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-wherecat@moz.com)": true, | ||
"voltron": true, | ||
"Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)": true | ||
}; | ||
if(!fs.existsSync(this.opts.cache_path)) { | ||
fs.mkdirSync(this.opts.cache_path); | ||
} | ||
_.extend(this.opts, opts); | ||
fs.exists(this.opts.cache_path, function(exists) { | ||
if(!exists) { | ||
fs.mkdir(_this.opts.cache_path, function(err) { | ||
if(err) { | ||
_this.debug(err); | ||
} | ||
}); | ||
} | ||
}); | ||
}; | ||
BigSEO.prototype.cache = function() { | ||
var _this = this; | ||
var express = require('express'); | ||
var router = express.Router(); | ||
BigSEO.prototype.cache = function(req, res) { | ||
var body = req.body.dom; | ||
var rawUrl = req.body.url; | ||
var url = _this.encodeURL(rawUrl); | ||
router.post(this.opts.cache_url, function(req, res) { | ||
var body = req.body.dom; | ||
var rawUrl = req.body.url; | ||
var url = _this.encodeURL(rawUrl); | ||
_this.debug("Saving cache: " + _this.cachePathFor(url)); | ||
_this.debug("Saving cache: " + _this.cachePathFor(url)); | ||
fs.writeFile(_this.cachePathFor(url), body, function(err) { | ||
if(err) { | ||
console.log(err); | ||
_this.debug("Error saving cache for: " + rawUrl); | ||
res.send(500); | ||
} | ||
else { | ||
_this.debug("New cache for url: " + rawUrl); | ||
res.send(200); | ||
} | ||
}); | ||
}; | ||
fs.writeFile(_this.cachePathFor(url), body, function(err) { | ||
if(err) { | ||
console.log(err); | ||
_this.debug("Error saving cache for: " + rawUrl); | ||
res.send(500); | ||
BigSEO.prototype.middleware = function(req, res, next) { | ||
var ua = req.headers['user-agent']; | ||
_this.debug("UA: " + ua); | ||
var url = req.protocol + "://" + req.headers.host + req.originalUrl; | ||
if (req.method == "GET" && _this.matchUA(ua)) { | ||
_this.debug("Verifying if has cache for: " + url); | ||
_this.hasCacheFor(url, function(hasCache) { | ||
if(hasCache) { | ||
_this.debug('Cache Hit for ' + url); | ||
_this.getCacheContentFor(url, function(data) { | ||
res.send(data); | ||
}); | ||
} | ||
else { | ||
_this.debug("New cache for url: " + rawUrl); | ||
res.send(200); | ||
_this.debug('Cache Miss for ' + url); | ||
next(); | ||
} | ||
}); | ||
}); | ||
return router; | ||
} else { | ||
_this.debug('Cache Miss for ' + url); | ||
next(); | ||
} | ||
}; | ||
@@ -57,20 +105,9 @@ | ||
var currentDir = __dirname; | ||
var _this = this; | ||
var express = require('express'); | ||
var router = express.Router(); | ||
router.use(function(req, res, next) { | ||
var ua = req.headers['user-agent']; | ||
_this.debug("UA: " + ua); | ||
router.use(this.middleware); | ||
router.post(this.opts.cache_url, this.cache); | ||
var url = req.protocol + "://" + req.headers.host + req.originalUrl; | ||
if (req.method == "GET" && _this.matchUA(ua) && _this.hasCacheFor(url)) { | ||
_this.debug('Cache Hit for ' + url); | ||
res.status(200).send(_this.getCacheContentFor(url)); | ||
} else { | ||
_this.debug('Cache Miss for ' + url); | ||
next(); | ||
} | ||
}); | ||
router.get('/bigseo/bigseo.js', function(req, res) { | ||
@@ -90,4 +127,6 @@ fs.readFile(currentDir + '/static/bigseo.js', function(err, data) { | ||
BigSEO.prototype.getCacheContentFor = function(url) { | ||
return fs.readFileSync(this.cachePathFor(url)); | ||
BigSEO.prototype.getCacheContentFor = function(url, cb) { | ||
fs.readFile(this.cachePathFor(this.encodeURL(url)), function(err, data) { | ||
cb(data.toString('utf8')); | ||
}); | ||
}; | ||
@@ -101,4 +140,4 @@ | ||
BigSEO.prototype.hasCacheFor = function(url) { | ||
return fs.existsSync(this.cachePathFor(url)); | ||
BigSEO.prototype.hasCacheFor = function(url, cb) { | ||
return fs.exists(this.cachePathFor(this.encodeURL(url)), cb); | ||
}; | ||
@@ -105,0 +144,0 @@ |
{ | ||
"name": "bigseo", | ||
"version": "0.1.0", | ||
"description": "BigSEO is a ExpresJS module built for apps who need a SEO Engine exclusively for web crawlers such as Google, Bing, Facebook, etc.", | ||
"keywords": "seo, engine, express, cache, crawler, bigseo", | ||
"main": "index.js", | ||
"author": "Rafael Grillo Abreu <grillorafael@gmail.com> (http://rgrillo.com/)", | ||
"license": "MIT", | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/grillorafael/bigseo" | ||
}, | ||
"dependencies": { | ||
"underscore": "^1.6.0" | ||
} | ||
"name": "bigseo", | ||
"version": "0.2.0", | ||
"description": "BigSEO is a ExpresJS module built for apps who need a SEO Engine exclusively for web crawlers such as Google, Bing, Facebook, etc.", | ||
"keywords": "seo, engine, express, cache, crawler, bigseo", | ||
"main": "index.js", | ||
"author": "Rafael Grillo Abreu <grillorafael@gmail.com> (http://rgrillo.com/)", | ||
"license": "MIT", | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/grillorafael/bigseo" | ||
}, | ||
"dependencies": { | ||
"underscore": "^1.6.0" | ||
}, | ||
"devDependencies": { | ||
"express": "^4.7.2" | ||
} | ||
} |
@@ -1,9 +0,23 @@ | ||
# BigSEO (WIP) | ||
# BigSEO | ||
BigSEO is a ExpresJS module built for apps who need a SEO Engine exclusively for web crawlers such as Google, Bing, Facebook, etc. | ||
It is simple to attach to your pre existing ExpressJS application. | ||
BigSEO is a simple middleware for expressjs to handle crawler requests. | ||
```npm install bigseo --save``` | ||
Things you can make crawlers see if you use BigSEO: | ||
1. AngularJS rendered pages | ||
1. Disqus comments | ||
1. Facebook comments | ||
1. Javascript dom modifications | ||
1. etc | ||
Things you can do if you use BigSEO: | ||
1. Add crawlers meta tags via javascript | ||
1. Load your content through AJAX | ||
1. Write AngularJS applications with no worries about SEO | ||
1. etc | ||
###TODO: | ||
@@ -13,16 +27,16 @@ 1. List robots user agents | ||
1. Work without jquery | ||
1. Other language compatibility | ||
## ExpressJS 4.x | ||
It is very simple to use BigSEO. Under your server application, insert this code snippet right before your route definitions. From now on, we will route every crawler request directly to an existing cache. If there is no cache, we will proceed with the request to the default response. | ||
```javascript | ||
var bigSeo = require('bigseo')(); | ||
. | ||
. | ||
. | ||
// Your application config | ||
app.use(bigSeo.run()); | ||
app.use('/save/cache', bigSeo.cache()); // You can change this route if you want | ||
// Your application routes | ||
``` | ||
BigSEO have a few configurable parameters that you can put in the constructor | ||
BigSEO have a few optional configurations that you can put in the constructor. | ||
@@ -38,2 +52,4 @@ ```javascript | ||
Ex: ```var bigSeo = require('bigseo')({log: false});``` | ||
At the moment you start your express application, BigSEO will create by default a ```caches/``` where it will save the cached content. | ||
@@ -43,9 +59,6 @@ | ||
## Browser and Saving Cache | ||
## Saving your cache | ||
BigSEO also exposes to the browser a BigSEO function under ```/bigseo/bigseo.js```. So you can import this on your layout file | ||
Now, in order to build your cache, just put this tag on the html page you want to be ativated. | ||
```jade | ||
script(src='/bigseo/bigseo.js') | ||
``` | ||
```html | ||
@@ -55,3 +68,3 @@ <script src='/bigseo/bigseo.js'></script> | ||
The client follows these steps: | ||
And run the save method when you think the DOM is ready to be saved. | ||
```javascript | ||
@@ -72,2 +85,34 @@ var bigSeo = new BigSEO(); | ||
**REMEBER: IF YOU CHANGE THE SAVE PATH ON YOU EXPRESS APPLICATION, YOU ALSO HAVE TO CHANGE IN THE CLIENTS SETTING** | ||
Ex: ```var bigSeo = new BigSEO({url: '/cache'});``` | ||
**Warning: If you change the save path on your express application, your also have to change in the client settings** | ||
## user-agents reference | ||
The user agents reference can be found [here](http://user-agent-string.info/list-of-ua/bots). | ||
You can find the list of implemented user agents [here](https://github.com/grillorafael/bigseo/blob/master/index.js). | ||
Please **contribute** by adding more relevant items to the list! | ||
## LICENSE | ||
The MIT License (MIT) | ||
Copyright (c) [2014] [Rafael Grillo Abreu] | ||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 1 instance in 1 package
10130
160
114
1
2