Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

rssspider

Package Overview
Dependencies
Maintainers
1
Versions
11
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

rssspider - npm Package Compare versions

Comparing version 1.2.0 to 1.3.0

199

lib/rss.js

@@ -10,11 +10,15 @@ /**

var Promise = require('bluebird'),
FeedParser = require('feedparser'),
_ = require('lodash'),
request = require('request'),
read = require('node-readability'),
iconv = require('iconv-lite'),
es = require('event-stream'),
postOptions = ['title', 'description', 'summary', 'date', 'link',
'guid', 'author', 'comments', 'origlink', 'image', 'source', 'categories', 'enclosures'],
siteInfoOption = ['title', 'description', 'date', 'link', 'xmlurl', 'author', 'favicon', 'copyright', 'generator', 'image'];
FeedParser = require('feedparser'),
_ = require('lodash'),
request = require('request'),
// read = require('node-readability'),
iconv = require('iconv-lite'),
es = require('event-stream'),
postOptions = ['title', 'description', 'summary', 'date', 'link',
'guid', 'author', 'comments', 'origlink', 'image', 'source', 'categories',
'enclosures'
],
siteInfoOption = ['title', 'description', 'date', 'link', 'xmlurl', 'author',
'favicon', 'copyright', 'generator', 'image'
];

@@ -30,24 +34,29 @@ /**

return new Promise(function (resolve, reject) {
var posts,encoding;
var req = request(url, {timeout: 10000, pool: false});
return new Promise(function(resolve, reject) {
var posts, encoding;
var req = request(url, {
timeout: 10000,
pool: false
});
req.setMaxListeners(50);
req.setHeader('user-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36')
req.setHeader('user-agent',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36'
)
req.setHeader('accept', 'text/html,application/xhtml+xml');
var feedparser = new FeedParser();
req.on('error', reject);
req.on('response', function (res) {
var stream = this;
posts = [];
if (res.statusCode !== 200) {
return this.emit('error', new Error('Bad status code'));
}
req.on('response', function(res) {
var stream = this;
posts = [];
if (res.statusCode !== 200) {
return this.emit('error', new Error('Bad status code'));
}
}).pipe(es.through(function(data) {
//get charset from <?xml version="1.0" encoding="gb2312"?><rss version="2.0">

@@ -59,3 +68,3 @@ //then convert gb2312,gbk,big5 etc to utf-8

var meta = result.match(/<\?(.*?)\?>/g);
if(meta !== null){
if (meta !== null) {
meta = meta[0].toString().match(/encoding="(.*?)"\?>/g);

@@ -66,28 +75,28 @@ encoding = meta.toString().split('"')[1];

//iconv-lite , which can support windows
result = iconv.decode(data,encoding);
result = iconv.decode(data, encoding);
this.emit('data', result);
})).pipe(feedparser);
feedparser.on('error', reject);
feedparser.on('end', function (err) {
if (err) {
reject(err);
}
resolve(posts);
feedparser.on('end', function(err) {
if (err) {
reject(err);
}
resolve(posts);
});
feedparser.on('readable', function () {
while (post = this.read()) {
var post = _.pick(post, options);
posts.push(post);
}
feedparser.on('readable', function() {
while (post = this.read()) {
var post = _.pick(post, options);
posts.push(post);
}
});

@@ -107,12 +116,17 @@ });

options = options || siteInfoOption;
return new Promise(function (resolve, reject) {
return new Promise(function(resolve, reject) {
var rss;
var req = request(url, {timeout: 10000, pool: false});
var req = request(url, {
timeout: 10000,
pool: false
});
req.setMaxListeners(50);
// Some feeds do not response without user-agent and accept headers.
req.setHeader('user-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36')
req.setHeader('user-agent',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36'
)
req.setHeader('accept', 'text/html,application/xhtml+xml');
var feedparser = new FeedParser();
req.on('error', reject);
req.on('response', function (res) {
req.on('response', function(res) {
var stream = this;

@@ -123,5 +137,5 @@ if (res.statusCode !== 200) {

//charset = getParams(res.headers['content-type'] || '').charset;
// stream.pipe(feedparser);
// stream.pipe(feedparser);
}).pipe(es.through(function(data) {
//get charset from <?xml version="1.0" encoding="gb2312"?><rss version="2.0">

@@ -133,3 +147,3 @@ //then convert gb2312,gbk,big5 etc to utf-8

var meta = result.match(/<\?(.*?)\?>/g);
if(meta !== null){
if (meta !== null) {
meta = meta[0].toString().match(/encoding="(.*?)"\?>/g);

@@ -140,8 +154,8 @@ encoding = meta.toString().split('"')[1];

//iconv-lite , which can support windows
result = iconv.decode(data,encoding);
result = iconv.decode(data, encoding);
this.emit('data', result);
})).pipe(feedparser);
feedparser.on('error', reject);
feedparser.on('end', function (err) {
feedparser.on('end', function(err) {
if (err) {

@@ -152,7 +166,7 @@ reject(err);

});
feedparser.on('readable', function () {
feedparser.on('readable', function() {
var post;
if (post = this.read()) {
rss = _.pick(post.meta, options);
rss.feedurl = url; //rss 的url
rss.feedurl = url; //rss 的url
resolve(rss);

@@ -163,18 +177,20 @@ }

}
// /**
// * get all post's body content by post list
// * @param posts
// * @returns {*|Promise}
// */
// function fetchAllContent(posts) {
// return Promise.reduce(posts, function(total, post) {
// return getCleanBody(post.link).then(function(article) {
// post.content = article.content ? article.content : post.description ||
// post.summary;
// return post;
// });
// }, []).then(function(total) {
// return posts;
// });
// }
/**
* get all post's body content by post list
* @param posts
* @returns {*|Promise}
*/
function fetchAllContent(posts) {
return Promise.reduce(posts, function (total, post) {
return getCleanBody(post.link).then(function (article) {
post.content = article.content ? article.content : post.description || post.summary;
return post;
});
}, []).then(function (total) {
return posts;
});
}
/**
* get all content and rss post by rssUrl

@@ -184,28 +200,29 @@ * @param url

*/
function getAllByUrl(url) {
return fetchRss(url).then(function (posts) {
return fetchAllContent(posts);
});
}
/**
* get body content by link
* @param link
* @returns {Promise}
*/
function getCleanBody(link) {
return new Promise(function (resolve, reject) {
read(link, function (err, article, meta) {
if (err) {
reject(err);
}
resolve(article);
});
});
}
// function getAllByUrl(url) {
// return fetchRss(url).then(function(posts) {
// return fetchAllContent(posts);
// });
// }
// /**
// * get body content by link
// * @param link
// * @returns {Promise}
// */
// function getCleanBody(link) {
// return new Promise(function (resolve, reject) {
// read(link, function (err, article, meta) {
// if (err) {
// reject(err);
// }
// resolve(article);
// });
// });
// }
module.exports = {
fetchRss: fetchRss,
siteInfo: siteInfo,
fetchAllContent: fetchAllContent,
getCleanBody: getCleanBody,
getAllByUrl: getAllByUrl
//fetchAllContent: fetchAllContent,
//getCleanBody: getCleanBody,
//getAllByUrl: getAllByUrl
};
{
"name": "rssspider",
"version": "1.2.0",
"version": "1.3.0",
"author": "Liu Xing <shanelau1021@gmail.com> (http://kissliux.github.io)",

@@ -19,3 +19,3 @@ "description": "The simplest way to use rssspide to fetch rss list and site info. Fetch post'content ,give clean view to you. ",

"dependencies": {
"bluebird": "^2.3.2",
"bluebird": "^2.10.2",
"event-stream": "^3.3.2",

@@ -25,4 +25,2 @@ "feedparser": "^0.19.2",

"lodash": "^2.4.1",
"node-readability": "^1.0.2",
"readability": "^0.1.0",
"request": "^2.45.0"

@@ -29,0 +27,0 @@ },

@@ -91,2 +91,3 @@ # rssSpider

** 以下功能在 1.2.0 才能使用, readability 的库支持不是很好 **

@@ -156,3 +157,3 @@ ### 3. `getCleanBody(url)`

## updrade
## upgrade
Add node 4.x support

@@ -159,0 +160,0 @@

@@ -23,18 +23,1 @@ /**

};
exports.getAllByUrl = function(test) {
spide.getAllByUrl(url).then(function(data) {
test.ok(data.length > 0, "this assertion should pass");
test.done();
});
};
exports.getPostContent = function(test) {
var url = 'http://www.bigertech.com/post/the-art-of-mfc/';
spide.getCleanBody(url).then(function(data) {
test.ok(data.title != '', "this assertion should pass");
test.done();
}).catch(function(err) {
console.error(err);
});
};
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc