Comparing version 1.2.0 to 1.3.0
199
lib/rss.js
@@ -10,11 +10,15 @@ /** | ||
var Promise = require('bluebird'), | ||
FeedParser = require('feedparser'), | ||
_ = require('lodash'), | ||
request = require('request'), | ||
read = require('node-readability'), | ||
iconv = require('iconv-lite'), | ||
es = require('event-stream'), | ||
postOptions = ['title', 'description', 'summary', 'date', 'link', | ||
'guid', 'author', 'comments', 'origlink', 'image', 'source', 'categories', 'enclosures'], | ||
siteInfoOption = ['title', 'description', 'date', 'link', 'xmlurl', 'author', 'favicon', 'copyright', 'generator', 'image']; | ||
FeedParser = require('feedparser'), | ||
_ = require('lodash'), | ||
request = require('request'), | ||
// read = require('node-readability'), | ||
iconv = require('iconv-lite'), | ||
es = require('event-stream'), | ||
postOptions = ['title', 'description', 'summary', 'date', 'link', | ||
'guid', 'author', 'comments', 'origlink', 'image', 'source', 'categories', | ||
'enclosures' | ||
], | ||
siteInfoOption = ['title', 'description', 'date', 'link', 'xmlurl', 'author', | ||
'favicon', 'copyright', 'generator', 'image' | ||
]; | ||
@@ -30,24 +34,29 @@ /** | ||
return new Promise(function (resolve, reject) { | ||
var posts,encoding; | ||
var req = request(url, {timeout: 10000, pool: false}); | ||
return new Promise(function(resolve, reject) { | ||
var posts, encoding; | ||
var req = request(url, { | ||
timeout: 10000, | ||
pool: false | ||
}); | ||
req.setMaxListeners(50); | ||
req.setHeader('user-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36') | ||
req.setHeader('user-agent', | ||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36' | ||
) | ||
req.setHeader('accept', 'text/html,application/xhtml+xml'); | ||
var feedparser = new FeedParser(); | ||
req.on('error', reject); | ||
req.on('response', function (res) { | ||
var stream = this; | ||
posts = []; | ||
if (res.statusCode !== 200) { | ||
return this.emit('error', new Error('Bad status code')); | ||
} | ||
req.on('response', function(res) { | ||
var stream = this; | ||
posts = []; | ||
if (res.statusCode !== 200) { | ||
return this.emit('error', new Error('Bad status code')); | ||
} | ||
}).pipe(es.through(function(data) { | ||
//get charset from <?xml version="1.0" encoding="gb2312"?><rss version="2.0"> | ||
@@ -59,3 +68,3 @@ //then convert gb2312,gbk,big5 etc to utf-8 | ||
var meta = result.match(/<\?(.*?)\?>/g); | ||
if(meta !== null){ | ||
if (meta !== null) { | ||
meta = meta[0].toString().match(/encoding="(.*?)"\?>/g); | ||
@@ -66,28 +75,28 @@ encoding = meta.toString().split('"')[1]; | ||
//iconv-lite , which can support windows | ||
result = iconv.decode(data,encoding); | ||
result = iconv.decode(data, encoding); | ||
this.emit('data', result); | ||
})).pipe(feedparser); | ||
feedparser.on('error', reject); | ||
feedparser.on('end', function (err) { | ||
if (err) { | ||
reject(err); | ||
} | ||
resolve(posts); | ||
feedparser.on('end', function(err) { | ||
if (err) { | ||
reject(err); | ||
} | ||
resolve(posts); | ||
}); | ||
feedparser.on('readable', function () { | ||
while (post = this.read()) { | ||
var post = _.pick(post, options); | ||
posts.push(post); | ||
} | ||
feedparser.on('readable', function() { | ||
while (post = this.read()) { | ||
var post = _.pick(post, options); | ||
posts.push(post); | ||
} | ||
}); | ||
@@ -107,12 +116,17 @@ }); | ||
options = options || siteInfoOption; | ||
return new Promise(function (resolve, reject) { | ||
return new Promise(function(resolve, reject) { | ||
var rss; | ||
var req = request(url, {timeout: 10000, pool: false}); | ||
var req = request(url, { | ||
timeout: 10000, | ||
pool: false | ||
}); | ||
req.setMaxListeners(50); | ||
// Some feeds do not response without user-agent and accept headers. | ||
req.setHeader('user-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36') | ||
req.setHeader('user-agent', | ||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36' | ||
) | ||
req.setHeader('accept', 'text/html,application/xhtml+xml'); | ||
var feedparser = new FeedParser(); | ||
req.on('error', reject); | ||
req.on('response', function (res) { | ||
req.on('response', function(res) { | ||
var stream = this; | ||
@@ -123,5 +137,5 @@ if (res.statusCode !== 200) { | ||
//charset = getParams(res.headers['content-type'] || '').charset; | ||
// stream.pipe(feedparser); | ||
// stream.pipe(feedparser); | ||
}).pipe(es.through(function(data) { | ||
//get charset from <?xml version="1.0" encoding="gb2312"?><rss version="2.0"> | ||
@@ -133,3 +147,3 @@ //then convert gb2312,gbk,big5 etc to utf-8 | ||
var meta = result.match(/<\?(.*?)\?>/g); | ||
if(meta !== null){ | ||
if (meta !== null) { | ||
meta = meta[0].toString().match(/encoding="(.*?)"\?>/g); | ||
@@ -140,8 +154,8 @@ encoding = meta.toString().split('"')[1]; | ||
//iconv-lite , which can support windows | ||
result = iconv.decode(data,encoding); | ||
result = iconv.decode(data, encoding); | ||
this.emit('data', result); | ||
})).pipe(feedparser); | ||
feedparser.on('error', reject); | ||
feedparser.on('end', function (err) { | ||
feedparser.on('end', function(err) { | ||
if (err) { | ||
@@ -152,7 +166,7 @@ reject(err); | ||
}); | ||
feedparser.on('readable', function () { | ||
feedparser.on('readable', function() { | ||
var post; | ||
if (post = this.read()) { | ||
rss = _.pick(post.meta, options); | ||
rss.feedurl = url; //rss 的url | ||
rss.feedurl = url; //rss 的url | ||
resolve(rss); | ||
@@ -163,18 +177,20 @@ } | ||
} | ||
// /** | ||
// * get all post's body content by post list | ||
// * @param posts | ||
// * @returns {*|Promise} | ||
// */ | ||
// function fetchAllContent(posts) { | ||
// return Promise.reduce(posts, function(total, post) { | ||
// return getCleanBody(post.link).then(function(article) { | ||
// post.content = article.content ? article.content : post.description || | ||
// post.summary; | ||
// return post; | ||
// }); | ||
// }, []).then(function(total) { | ||
// return posts; | ||
// }); | ||
// } | ||
/** | ||
* get all post's body content by post list | ||
* @param posts | ||
* @returns {*|Promise} | ||
*/ | ||
function fetchAllContent(posts) { | ||
return Promise.reduce(posts, function (total, post) { | ||
return getCleanBody(post.link).then(function (article) { | ||
post.content = article.content ? article.content : post.description || post.summary; | ||
return post; | ||
}); | ||
}, []).then(function (total) { | ||
return posts; | ||
}); | ||
} | ||
/** | ||
* get all content and rss post by rssUrl | ||
@@ -184,28 +200,29 @@ * @param url | ||
*/ | ||
function getAllByUrl(url) { | ||
return fetchRss(url).then(function (posts) { | ||
return fetchAllContent(posts); | ||
}); | ||
} | ||
/** | ||
* get body content by link | ||
* @param link | ||
* @returns {Promise} | ||
*/ | ||
function getCleanBody(link) { | ||
return new Promise(function (resolve, reject) { | ||
read(link, function (err, article, meta) { | ||
if (err) { | ||
reject(err); | ||
} | ||
resolve(article); | ||
}); | ||
}); | ||
} | ||
// function getAllByUrl(url) { | ||
// return fetchRss(url).then(function(posts) { | ||
// return fetchAllContent(posts); | ||
// }); | ||
// } | ||
// /** | ||
// * get body content by link | ||
// * @param link | ||
// * @returns {Promise} | ||
// */ | ||
// function getCleanBody(link) { | ||
// return new Promise(function (resolve, reject) { | ||
// read(link, function (err, article, meta) { | ||
// if (err) { | ||
// reject(err); | ||
// } | ||
// resolve(article); | ||
// }); | ||
// }); | ||
// } | ||
module.exports = { | ||
fetchRss: fetchRss, | ||
siteInfo: siteInfo, | ||
fetchAllContent: fetchAllContent, | ||
getCleanBody: getCleanBody, | ||
getAllByUrl: getAllByUrl | ||
//fetchAllContent: fetchAllContent, | ||
//getCleanBody: getCleanBody, | ||
//getAllByUrl: getAllByUrl | ||
}; |
{ | ||
"name": "rssspider", | ||
"version": "1.2.0", | ||
"version": "1.3.0", | ||
"author": "Liu Xing <shanelau1021@gmail.com> (http://kissliux.github.io)", | ||
@@ -19,3 +19,3 @@ "description": "The simplest way to use rssspide to fetch rss list and site info. Fetch post'content ,give clean view to you. ", | ||
"dependencies": { | ||
"bluebird": "^2.3.2", | ||
"bluebird": "^2.10.2", | ||
"event-stream": "^3.3.2", | ||
@@ -25,4 +25,2 @@ "feedparser": "^0.19.2", | ||
"lodash": "^2.4.1", | ||
"node-readability": "^1.0.2", | ||
"readability": "^0.1.0", | ||
"request": "^2.45.0" | ||
@@ -29,0 +27,0 @@ }, |
@@ -91,2 +91,3 @@ # rssSpider | ||
** 以下功能在 1.2.0 才能使用, readability 的库支持不是很好 ** | ||
@@ -156,3 +157,3 @@ ### 3. `getCleanBody(url)` | ||
## updrade | ||
## upgrade | ||
Add node 4.x support | ||
@@ -159,0 +160,0 @@ |
@@ -23,18 +23,1 @@ /** | ||
}; | ||
exports.getAllByUrl = function(test) { | ||
spide.getAllByUrl(url).then(function(data) { | ||
test.ok(data.length > 0, "this assertion should pass"); | ||
test.done(); | ||
}); | ||
}; | ||
exports.getPostContent = function(test) { | ||
var url = 'http://www.bigertech.com/post/the-art-of-mfc/'; | ||
spide.getCleanBody(url).then(function(data) { | ||
test.ok(data.title != '', "this assertion should pass"); | ||
test.done(); | ||
}).catch(function(err) { | ||
console.error(err); | ||
}); | ||
}; |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
6
163
10861
- Removednode-readability@^1.0.2
- Removedreadability@^0.1.0
- Removedasn1@0.1.11(transitive)
- Removedassert-plus@0.1.5(transitive)
- Removedasync@0.9.2(transitive)
- Removedaws-sign2@0.5.0(transitive)
- Removedbindings@1.5.0(transitive)
- Removedboom@0.4.2(transitive)
- Removedbrowser-request@0.3.3(transitive)
- Removedcombined-stream@0.0.7(transitive)
- Removedcontextify@0.1.15(transitive)
- Removedcryptiles@0.2.2(transitive)
- Removedcssom@0.3.8(transitive)
- Removedcssstyle@0.2.37(transitive)
- Removedctype@0.5.3(transitive)
- Removeddelayed-stream@0.0.5(transitive)
- Removeddom-serializer@0.2.2(transitive)
- Removeddomelementtype@1.3.12.3.0(transitive)
- Removeddomhandler@2.4.2(transitive)
- Removeddomutils@1.7.0(transitive)
- Removedencoding@0.1.13(transitive)
- Removedentities@1.1.22.2.0(transitive)
- Removedfile-uri-to-path@1.0.0(transitive)
- Removedforever-agent@0.5.2(transitive)
- Removedform-data@0.1.4(transitive)
- Removedhawk@1.1.1(transitive)
- Removedhoek@0.9.1(transitive)
- Removedhtmlparser@1.7.7(transitive)
- Removedhtmlparser2@3.10.1(transitive)
- Removedhttp-signature@0.10.1(transitive)
- Removediconv-lite@0.6.3(transitive)
- Removedjsdom@1.5.0(transitive)
- Removedmime@1.2.11(transitive)
- Removedmime-types@1.0.2(transitive)
- Removedmjsunit.runner@0.1.3(transitive)
- Removednan@2.22.0(transitive)
- Removednode-readability@1.0.2(transitive)
- Removednode-uuid@1.4.8(transitive)
- Removednwmatcher@1.4.4(transitive)
- Removedoauth-sign@0.3.0(transitive)
- Removedparse5@1.5.1(transitive)
- Removedqs@1.0.2(transitive)
- Removedreadability@0.1.0(transitive)
- Removedreadable-stream@3.6.2(transitive)
- Removedrequest@2.40.0(transitive)
- Removedsntp@0.2.4(transitive)
- Removedstring_decoder@1.3.0(transitive)
- Removedstringstream@0.0.6(transitive)
- Removedtldts@6.1.64(transitive)
- Removedtldts-core@6.1.64(transitive)
- Removedtough-cookie@5.0.0(transitive)
- Removedtunnel-agent@0.4.3(transitive)
- Removedutil-deprecate@1.0.2(transitive)
- Removedxmlhttprequest@1.8.0(transitive)
Updatedbluebird@^2.10.2