rss-parser
Advanced tools
Comparing version 2.9.0 to 2.10.0
140
index.js
@@ -0,1 +1,3 @@ | ||
'use strict'; | ||
var Entities = require("entities"); | ||
@@ -12,2 +14,8 @@ var FS = require('fs'); | ||
var FEED_FIELDS = [ | ||
['author', 'creator'], | ||
['dc:publisher', 'publisher'], | ||
['dc:creator', 'creator'], | ||
['dc:source', 'source'], | ||
['dc:title', 'title'], | ||
['dc:type', 'type'], | ||
'title', | ||
@@ -20,11 +28,13 @@ 'description', | ||
'generator', | ||
'link' | ||
'link', | ||
]; | ||
var PODCAST_TOP_FIELDS = [ | ||
'author', | ||
'subtitle', | ||
'summary', | ||
'explicit' | ||
]; | ||
var ITEM_FIELDS = [ | ||
['author', 'creator'], | ||
['dc:creator', 'creator'], | ||
['dc:date', 'date'], | ||
['dc:language', 'language'], | ||
['dc:rights', 'rights'], | ||
['dc:source', 'source'], | ||
['dc:title', 'title'], | ||
'title', | ||
@@ -37,12 +47,24 @@ 'link', | ||
'dc:creator', | ||
'dc:date' | ||
'dc:date', | ||
]; | ||
var PODCAST_ITEM_FIELDS = [ | ||
var mapItunesField = function(f) { | ||
return ['itunes:' + f, f]; | ||
} | ||
var PODCAST_FEED_FIELDS = ([ | ||
'author', | ||
'subtitle', | ||
'summary', | ||
'explicit' | ||
]).map(mapItunesField); | ||
var PODCAST_ITEM_FIELDS = ([ | ||
'author', | ||
'subtitle', | ||
'summary', | ||
'explicit', | ||
'duration', | ||
'image' | ||
]; | ||
]).map(mapItunesField); | ||
@@ -69,3 +91,3 @@ | ||
var parseAtomFeed = function(xmlObj, callback) { | ||
var parseAtomFeed = function(xmlObj, options, callback) { | ||
var feed = xmlObj.feed; | ||
@@ -105,8 +127,23 @@ var json = {feed: {entries: []}}; | ||
var parseRSS1 = function(xmlObj, callback) { | ||
callback("RSS 1.0 parsing not yet implemented.") | ||
var parseRSS1 = function(xmlObj, options, callback) { | ||
xmlObj = xmlObj['rdf:RDF']; | ||
var channel = xmlObj.channel[0]; | ||
var items = xmlObj.item; | ||
return parseRSS(channel, items, options, callback); | ||
} | ||
var parseRSS2 = function(xmlObj, options, callback) { | ||
var channel = xmlObj.rss.channel[0]; | ||
var items = channel.item; | ||
return parseRSS(channel, items, options, function(err, data) { | ||
if (err) return callback(err); | ||
if (xmlObj.rss.$['xmlns:itunes']) { | ||
decorateItunes(data, channel); | ||
} | ||
callback(null, data); | ||
}); | ||
} | ||
var parseRSS = function(channel, items, options, callback) { | ||
items = items || []; | ||
options.customFields = options.customFields || {}; | ||
@@ -117,14 +154,8 @@ var itemFields = ITEM_FIELDS.concat(options.customFields.item || []); | ||
var json = {feed: {entries: []}}; | ||
var channel = xmlObj.rss.channel[0]; | ||
if (channel['atom:link']) json.feed.feedUrl = channel['atom:link'][0].$.href; | ||
feedFields.forEach(function(f) { | ||
if (channel[f]) json.feed[f] = channel[f][0]; | ||
}) | ||
var items = channel.item; | ||
(items || []).forEach(function(item) { | ||
copyFromXML(channel, json.feed, feedFields); | ||
items.forEach(function(item) { | ||
var entry = {}; | ||
itemFields.forEach(function(f) { | ||
if (item[f]) entry[f] = item[f][0]; | ||
}) | ||
copyFromXML(item, entry, itemFields); | ||
if (item.enclosure) { | ||
@@ -142,10 +173,27 @@ entry.enclosure = item.enclosure[0].$; | ||
if (item.category) entry.categories = item.category; | ||
var date = entry.pubDate || entry.date; | ||
if (date) { | ||
try { | ||
entry.isoDate = new Date(date.trim()).toISOString(); | ||
} catch (e) { | ||
// Ignore bad date format | ||
} | ||
} | ||
json.feed.entries.push(entry); | ||
}) | ||
if (xmlObj.rss.$['xmlns:itunes']) { | ||
decorateItunes(json, channel); | ||
} | ||
callback(null, json); | ||
} | ||
var copyFromXML = function(xml, dest, fields) { | ||
fields.forEach(function(f) { | ||
var from = f; | ||
var to = f; | ||
if (Array.isArray(f)) { | ||
from = f[0]; | ||
to = f[1]; | ||
} | ||
if (xml[from] !== undefined) dest[to] = xml[from][0]; | ||
}) | ||
} | ||
/** | ||
@@ -183,35 +231,27 @@ * Add iTunes specific fields from XML to extracted JSON | ||
PODCAST_TOP_FIELDS.forEach(function(f) { | ||
if (channel['itunes:' + f]) json.feed.itunes[f] = channel['itunes:' + f][0]; | ||
copyFromXML(channel, json.feed.itunes, PODCAST_FEED_FIELDS); | ||
items.forEach(function(item, index) { | ||
var entry = json.feed.entries[index]; | ||
entry.itunes = {}; | ||
copyFromXML(item, entry.itunes, PODCAST_ITEM_FIELDS); | ||
var image = item['itunes:image']; | ||
if (image && image[0] && image[0].$ && image[0].$.href) { | ||
entry.itunes.image = image[0].$.href; | ||
} | ||
}); | ||
(items).forEach(function(item, index) { | ||
entry = json.feed.entries[index]; | ||
PODCAST_ITEM_FIELDS.forEach(function(f) { | ||
entry.itunes = entry.itunes || {}; | ||
if (item['itunes:' + f]) { | ||
if (f == 'image' && item['itunes:' + f][0].$ && item['itunes:' + f][0].$.href) { | ||
entry.itunes[f] = item['itunes:' + f][0].$.href; | ||
} else { | ||
entry.itunes[f] = item['itunes:' + f][0]; | ||
} | ||
} | ||
}); | ||
json.feed.entries[index] = entry; | ||
}); | ||
} | ||
Parser.parseString = function(xml, settings, callback) { | ||
Parser.parseString = function(xml, options, callback) { | ||
if (!callback) { | ||
callback = settings; | ||
settings = {}; | ||
callback = options; | ||
options = {}; | ||
} | ||
XML2JS.parseString(xml, function(err, result) { | ||
if (err) return callback(err); | ||
if (result.feed) { | ||
return parseAtomFeed(result, callback) | ||
return parseAtomFeed(result, options, callback) | ||
} else if (result.rss && result.rss.$.version && result.rss.$.version.indexOf('2') === 0) { | ||
return parseRSS2(result, settings, callback); | ||
return parseRSS2(result, options, callback); | ||
} else { | ||
return parseRSS1(result, callback); | ||
return parseRSS1(result, options, callback); | ||
} | ||
@@ -256,3 +296,3 @@ }); | ||
Parser.parseFile = function(file,options,callback) { | ||
Parser.parseFile = function(file, options, callback) { | ||
FS.readFile(file, 'utf8', function(err, contents) { | ||
@@ -259,0 +299,0 @@ return Parser.parseString(contents, options, callback); |
{ | ||
"name": "rss-parser", | ||
"version": "2.9.0", | ||
"version": "2.10.0", | ||
"main": "index.js", | ||
@@ -5,0 +5,0 @@ "scripts": { |
@@ -18,4 +18,10 @@ # rss-parser | ||
Check out the output format in [test/output/reddit.json](test/output/reddit.json) | ||
## Output | ||
Check out the full output format in [test/output/reddit.json](test/output/reddit.json) | ||
##### Notes: | ||
* The `dc:` prefix will be removed from all fields | ||
* Both `dc:date` and `pubDate` will be available in ISO 8601 format as `isoDate` | ||
* If `author` is specified, but not `dc:creator`, `creator` will be set to `author` ([see article](http://www.lowter.com/blogs/2008/2/9/rss-dccreator-author)) | ||
### NodeJS | ||
@@ -56,2 +62,4 @@ ```js | ||
### Custom Fields | ||
If your RSS feed contains fields that aren't currently returned, you can access them using the `customFields` option. | ||
```js | ||
@@ -73,2 +81,14 @@ var options = { | ||
To rename fields, you can pass in an array with two items, in the format `[fromField, toField]`: | ||
```js | ||
var options = { | ||
customFields: { | ||
item: [ | ||
['dc:coAuthor', 'coAuthor'], | ||
] | ||
} | ||
} | ||
``` | ||
## Contributing | ||
@@ -75,0 +95,0 @@ Contributions welcome! |
@@ -5,2 +5,4 @@ { | ||
{ | ||
"creator": "tobi", | ||
"date": "2016-05-04T06:53:45Z", | ||
"title": "My first Instant Article", | ||
@@ -14,3 +16,4 @@ "link": "https://localhost:8000", | ||
"contentSnippet": "Lorem ipsum", | ||
"guid": "https://localhost:8000" | ||
"guid": "https://localhost:8000", | ||
"isoDate": "2016-05-04T06:53:45.000Z" | ||
} | ||
@@ -17,0 +20,0 @@ ], |
@@ -5,2 +5,3 @@ { | ||
{ | ||
"creator": "foobar@gmail.com", | ||
"title": "FAQ for Narro", | ||
@@ -18,2 +19,3 @@ "link": "https://www.narro.co/article/54e703933058540300000069", | ||
"guid": "https://www.narro.co/article/54e703933058540300000069", | ||
"isoDate": "2015-02-20T09:51:15.000Z", | ||
"itunes": { | ||
@@ -20,0 +22,0 @@ "author": "foobar@gmail.com", |
@@ -13,3 +13,4 @@ { | ||
"space" | ||
] | ||
], | ||
"isoDate": "2016-01-23T15:40:37.000Z" | ||
}, | ||
@@ -25,3 +26,4 @@ { | ||
"pics" | ||
] | ||
], | ||
"isoDate": "2016-01-23T16:36:05.000Z" | ||
}, | ||
@@ -37,3 +39,4 @@ { | ||
"funny" | ||
] | ||
], | ||
"isoDate": "2016-01-23T15:22:27.000Z" | ||
}, | ||
@@ -49,3 +52,4 @@ { | ||
"gaming" | ||
] | ||
], | ||
"isoDate": "2016-01-23T14:51:51.000Z" | ||
}, | ||
@@ -61,3 +65,4 @@ { | ||
"news" | ||
] | ||
], | ||
"isoDate": "2016-01-23T14:49:30.000Z" | ||
}, | ||
@@ -73,3 +78,4 @@ { | ||
"gifs" | ||
] | ||
], | ||
"isoDate": "2016-01-23T14:32:28.000Z" | ||
}, | ||
@@ -85,3 +91,4 @@ { | ||
"worldnews" | ||
] | ||
], | ||
"isoDate": "2016-01-23T14:05:46.000Z" | ||
}, | ||
@@ -97,3 +104,4 @@ { | ||
"aww" | ||
] | ||
], | ||
"isoDate": "2016-01-23T13:33:58.000Z" | ||
}, | ||
@@ -109,3 +117,4 @@ { | ||
"todayilearned" | ||
] | ||
], | ||
"isoDate": "2016-01-23T13:55:20.000Z" | ||
}, | ||
@@ -121,3 +130,4 @@ { | ||
"food" | ||
] | ||
], | ||
"isoDate": "2016-01-23T15:29:25.000Z" | ||
}, | ||
@@ -133,3 +143,4 @@ { | ||
"OldSchoolCool" | ||
] | ||
], | ||
"isoDate": "2016-01-23T13:45:28.000Z" | ||
}, | ||
@@ -145,3 +156,4 @@ { | ||
"mildlyinteresting" | ||
] | ||
], | ||
"isoDate": "2016-01-23T12:48:32.000Z" | ||
}, | ||
@@ -157,3 +169,4 @@ { | ||
"Showerthoughts" | ||
] | ||
], | ||
"isoDate": "2016-01-23T14:03:51.000Z" | ||
}, | ||
@@ -169,3 +182,4 @@ { | ||
"UpliftingNews" | ||
] | ||
], | ||
"isoDate": "2016-01-23T16:22:41.000Z" | ||
}, | ||
@@ -181,3 +195,4 @@ { | ||
"AskReddit" | ||
] | ||
], | ||
"isoDate": "2016-01-23T13:11:40.000Z" | ||
}, | ||
@@ -193,3 +208,4 @@ { | ||
"videos" | ||
] | ||
], | ||
"isoDate": "2016-01-23T15:09:31.000Z" | ||
}, | ||
@@ -205,3 +221,4 @@ { | ||
"IAmA" | ||
] | ||
], | ||
"isoDate": "2016-01-23T18:08:59.000Z" | ||
}, | ||
@@ -217,3 +234,4 @@ { | ||
"EarthPorn" | ||
] | ||
], | ||
"isoDate": "2016-01-23T13:20:30.000Z" | ||
}, | ||
@@ -229,3 +247,4 @@ { | ||
"science" | ||
] | ||
], | ||
"isoDate": "2016-01-23T14:16:52.000Z" | ||
}, | ||
@@ -241,3 +260,4 @@ { | ||
"Art" | ||
] | ||
], | ||
"isoDate": "2016-01-23T13:11:18.000Z" | ||
}, | ||
@@ -253,3 +273,4 @@ { | ||
"books" | ||
] | ||
], | ||
"isoDate": "2016-01-23T15:37:45.000Z" | ||
}, | ||
@@ -265,3 +286,4 @@ { | ||
"tifu" | ||
] | ||
], | ||
"isoDate": "2016-01-23T17:58:36.000Z" | ||
}, | ||
@@ -277,3 +299,4 @@ { | ||
"Jokes" | ||
] | ||
], | ||
"isoDate": "2016-01-23T11:31:15.000Z" | ||
}, | ||
@@ -289,3 +312,4 @@ { | ||
"sports" | ||
] | ||
], | ||
"isoDate": "2016-01-23T14:11:56.000Z" | ||
}, | ||
@@ -301,3 +325,4 @@ { | ||
"explainlikeimfive" | ||
] | ||
], | ||
"isoDate": "2016-01-23T16:28:07.000Z" | ||
} | ||
@@ -304,0 +329,0 @@ ], |
@@ -13,3 +13,4 @@ { | ||
"funny" | ||
] | ||
], | ||
"isoDate": "2015-11-12T21:16:39.000Z" | ||
}, | ||
@@ -25,3 +26,4 @@ { | ||
"IAmA" | ||
] | ||
], | ||
"isoDate": "2015-11-12T22:27:28.000Z" | ||
}, | ||
@@ -37,3 +39,4 @@ { | ||
"mildlyinteresting" | ||
] | ||
], | ||
"isoDate": "2015-11-12T19:56:09.000Z" | ||
}, | ||
@@ -49,3 +52,4 @@ { | ||
"videos" | ||
] | ||
], | ||
"isoDate": "2015-11-12T20:06:37.000Z" | ||
}, | ||
@@ -61,3 +65,4 @@ { | ||
"pics" | ||
] | ||
], | ||
"isoDate": "2015-11-12T18:28:28.000Z" | ||
}, | ||
@@ -73,3 +78,4 @@ { | ||
"todayilearned" | ||
] | ||
], | ||
"isoDate": "2015-11-12T18:51:34.000Z" | ||
}, | ||
@@ -85,3 +91,4 @@ { | ||
"gaming" | ||
] | ||
], | ||
"isoDate": "2015-11-12T18:52:48.000Z" | ||
}, | ||
@@ -97,3 +104,4 @@ { | ||
"gifs" | ||
] | ||
], | ||
"isoDate": "2015-11-12T22:12:13.000Z" | ||
}, | ||
@@ -109,3 +117,4 @@ { | ||
"creepy" | ||
] | ||
], | ||
"isoDate": "2015-11-12T18:28:27.000Z" | ||
}, | ||
@@ -121,3 +130,4 @@ { | ||
"Music" | ||
] | ||
], | ||
"isoDate": "2015-11-12T19:32:50.000Z" | ||
}, | ||
@@ -133,3 +143,4 @@ { | ||
"news" | ||
] | ||
], | ||
"isoDate": "2015-11-12T20:46:38.000Z" | ||
}, | ||
@@ -145,3 +156,4 @@ { | ||
"explainlikeimfive" | ||
] | ||
], | ||
"isoDate": "2015-11-12T18:03:47.000Z" | ||
}, | ||
@@ -157,3 +169,4 @@ { | ||
"worldnews" | ||
] | ||
], | ||
"isoDate": "2015-11-12T20:20:15.000Z" | ||
}, | ||
@@ -169,3 +182,4 @@ { | ||
"movies" | ||
] | ||
], | ||
"isoDate": "2015-11-12T15:51:12.000Z" | ||
}, | ||
@@ -181,3 +195,4 @@ { | ||
"aww" | ||
] | ||
], | ||
"isoDate": "2015-11-12T15:51:37.000Z" | ||
}, | ||
@@ -193,3 +208,4 @@ { | ||
"food" | ||
] | ||
], | ||
"isoDate": "2015-11-12T15:03:03.000Z" | ||
}, | ||
@@ -205,3 +221,4 @@ { | ||
"LifeProTips" | ||
] | ||
], | ||
"isoDate": "2015-11-12T19:01:25.000Z" | ||
}, | ||
@@ -217,3 +234,4 @@ { | ||
"Jokes" | ||
] | ||
], | ||
"isoDate": "2015-11-12T15:54:22.000Z" | ||
}, | ||
@@ -229,3 +247,4 @@ { | ||
"sports" | ||
] | ||
], | ||
"isoDate": "2015-11-12T15:09:25.000Z" | ||
}, | ||
@@ -241,3 +260,4 @@ { | ||
"television" | ||
] | ||
], | ||
"isoDate": "2015-11-12T13:42:34.000Z" | ||
}, | ||
@@ -253,3 +273,4 @@ { | ||
"Showerthoughts" | ||
] | ||
], | ||
"isoDate": "2015-11-12T17:40:46.000Z" | ||
}, | ||
@@ -265,3 +286,4 @@ { | ||
"photoshopbattles" | ||
] | ||
], | ||
"isoDate": "2015-11-12T14:23:21.000Z" | ||
}, | ||
@@ -277,3 +299,4 @@ { | ||
"OldSchoolCool" | ||
] | ||
], | ||
"isoDate": "2015-11-12T17:36:21.000Z" | ||
}, | ||
@@ -289,3 +312,4 @@ { | ||
"GetMotivated" | ||
] | ||
], | ||
"isoDate": "2015-11-12T15:09:19.000Z" | ||
}, | ||
@@ -301,3 +325,4 @@ { | ||
"EarthPorn" | ||
] | ||
], | ||
"isoDate": "2015-11-12T12:43:37.000Z" | ||
} | ||
@@ -304,0 +329,0 @@ ], |
@@ -32,9 +32,10 @@ var FS = require('fs'); | ||
it('should not parse craigslist (RSS 1.0)', function(done) { | ||
Parser.parseFile(__dirname + '/input/craigslist.rss', function(err, parsed) { | ||
Expect(err.indexOf('not yet implemented')).to.not.equal(-1); | ||
done(); | ||
}) | ||
it('should parse sciencemag.org (RSS 1.0)', function(done) { | ||
testParseForFile('rss-1', 'rss', done); | ||
}) | ||
it('should parse craigslist (RSS 1.0)', function(done) { | ||
testParseForFile('craigslist', 'rss', done); | ||
}) | ||
it('should parse atom', function(done) { | ||
@@ -74,3 +75,3 @@ testParseForFile('reddit-atom', 'rss', done); | ||
customFields: { | ||
feed: ['language', 'copyright'], | ||
feed: ['language', 'copyright', 'nested-field'], | ||
item: ['subtitle'] | ||
@@ -77,0 +78,0 @@ } |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
1594863
34
18682
114