feedparser
Advanced tools
Comparing version 0.4.6 to 0.9.0
@@ -17,23 +17,426 @@ /********************************************************************** | ||
, util = require('util') | ||
, events = require('events'); | ||
, events = require('events') | ||
, utils = require('./utils') | ||
, getValue = utils.getValue; | ||
// Ensures we have .trim() to strip leading and trailing whitespace from any string | ||
if (!String.prototype.trim) { | ||
String.prototype.trim = function () { | ||
var str = this.replace(/^\s\s*/, ''); | ||
var ws = /\s/ | ||
, i = str.length; | ||
while (ws.test(str.charAt(--i))); | ||
return str.slice(0, i + 1); | ||
}; | ||
function handleMeta (node, type) { | ||
if (!type || !node) return {}; | ||
var meta = {}; | ||
['title','description','date','pubDate','link','xmlUrl','author','language','favicon','copyright','generator'].forEach(function (property){ | ||
meta[property] = null; | ||
}); | ||
meta.image = {}; | ||
meta.categories = []; | ||
Object.keys(node).forEach(function(name){ | ||
var el = node[name]; | ||
switch(name){ | ||
case('title'): | ||
meta.title = getValue(el); | ||
break; | ||
case('description'): | ||
case('subtitle'): | ||
meta.description = getValue(el); | ||
break; | ||
case('pubdate'): | ||
case('lastbuilddate'): | ||
case('published'): | ||
case('modified'): | ||
case('updated'): | ||
case('dc:date'): | ||
var date = getValue(el) ? new Date(el['#']) : null; | ||
if (!date) break; | ||
if (meta.pubDate === null || name == 'pubdate' || name == 'published') | ||
meta.pubDate = date; | ||
if (meta.date === null || name == 'lastbuilddate' || name == 'modified' || name == 'updated') | ||
meta.date = date; | ||
break; | ||
case('link'): | ||
case('atom:link'): | ||
case('atom10:link'): | ||
if (Array.isArray(el)) { | ||
el.forEach(function (link){ | ||
if (link['@']['href']) { // Atom | ||
if (getValue(link['@'], 'rel')) { | ||
if (link['@']['rel'] == 'alternate') meta.link = link['@']['href']; | ||
else if (link['@']['rel'] == 'self') meta.xmlUrl = link['@']['href']; | ||
} else { | ||
meta.link = link['@']['href']; | ||
} | ||
} else if (Object.keys(link['@']).length === 0) { // RSS | ||
if (!meta.link) meta.link = getValue(link); | ||
} | ||
}); | ||
} else { | ||
if (el['@']['href']) { // Atom | ||
if (getValue(el['@'], 'rel')) { | ||
if (el['@']['rel'] == 'alternate') meta.link = el['@']['href']; | ||
else if (el['@']['rel'] == 'self') meta.xmlUrl = el['@']['href']; | ||
} else { | ||
meta.link = el['@']['href']; | ||
} | ||
} else if (Object.keys(el['@']).length === 0) { // RSS | ||
if (!meta.link) meta.link = getValue(el); | ||
} | ||
} | ||
break; | ||
case('managingeditor'): | ||
case('webmaster'): | ||
case('author'): | ||
if (meta.author === null || name == 'managingeditor') | ||
meta.author = getValue(el); | ||
if (name == 'author') | ||
meta.author = getValue(el.name) || getValue(el.email) || getValue(el.uri); | ||
break; | ||
case('language'): | ||
meta.language = getValue(el); | ||
break; | ||
case('image'): | ||
case('logo'): | ||
if (el.url) | ||
meta.image.url = getValue(el.url); | ||
if (el.title) | ||
meta.image.title = getValue(el.title); | ||
else meta.image.url = getValue(el); | ||
break; | ||
case('icon'): | ||
meta.favicon = getValue(el); | ||
break; | ||
case('copyright'): | ||
case('rights'): | ||
case('dc:rights'): | ||
meta.copyright = getValue(el); | ||
break; | ||
case('generator'): | ||
meta.generator = getValue(el); | ||
if (el['@'].version) | ||
meta.generator += (meta.generator ? ' ' : '') + 'v' + el['@'].version; | ||
if (el['@'].uri) | ||
meta.generator += meta.generator ? ' (' + el['@'].uri + ')' : el['@'].uri; | ||
break; | ||
case('category'): | ||
case('dc:subject'): | ||
case('itunes:category'): | ||
case('media:category'): | ||
/* We handle all the kinds of categories within the switch loop because meta.categories | ||
* is an array, unlike the other properties, and therefore can handle multiple values | ||
*/ | ||
if (Array.isArray(el)) { | ||
el.forEach(function (category){ | ||
if ('category' == name && 'atom' == type) { | ||
if (category['@'] && getValue(category['@'], 'term')) meta.categories.push(getValue(category['@'], 'term')); | ||
} else if ('category' == name && 'rss' == type) { | ||
var categories = getValue(category).split(',').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) meta.categories = meta.categories.concat(categories); | ||
} else if ('dc:subject' == name) { | ||
var categories = getValue(category).split(' ').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) meta.categories = meta.categories.concat(categories); | ||
} else if ('itunes:category' == name) { | ||
var cat; | ||
if (category['@'] && getValue(category['@'], 'text')) cat = getValue(category['@'], 'text'); | ||
if (category[name]) { | ||
if (Array.isArray(category[name])) { | ||
category[name].forEach(function (subcategory){ | ||
if (subcategory['@'] && getValue(subcategory['@'], 'text')) meta.categories.push(cat + '/' + getValue(subcategory['@'], 'text')); | ||
}); | ||
} else { | ||
if (category[name]['@'] && getValue(category[name]['@'], 'text')) | ||
meta.categories.push(cat + '/' + getValue(category[name]['@'], 'text')); | ||
} | ||
} else { | ||
meta.categories.push(cat); | ||
} | ||
} else if ('media:category' == name) { | ||
meta.categories.push(getValue(category)); | ||
} | ||
}); | ||
} else { | ||
if ('category' == name && 'atom' == type) { | ||
if (getValue(el['@'], 'term')) meta.categories.push(getValue(el['@'], 'term')); | ||
} else if ('category' == name && 'rss' == type) { | ||
var categories = getValue(el).split(',').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) meta.categories = meta.categories.concat(categories); | ||
} else if ('dc:subject' == name) { | ||
var categories = getValue(el).split(' ').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) meta.categories = meta.categories.concat(categories); | ||
} else if ('itunes:category' == name) { | ||
var cat; | ||
if (el['@'] && getValue(el['@'], 'text')) cat = getValue(el['@'], 'text'); | ||
if (el[name]) { | ||
if (Array.isArray(el[name])) { | ||
el[name].forEach(function (subcategory){ | ||
if (subcategory['@'] && getValue(subcategory['@'], 'text')) meta.categories.push(cat + '/' + getValue(subcategory['@'], 'text')); | ||
}); | ||
} else { | ||
if (el[name]['@'] && getValue(el[name]['@'], 'text')) | ||
meta.categories.push(cat + '/' + getValue(el[name]['@'], 'text')); | ||
} | ||
} else { | ||
meta.categories.push(cat); | ||
} | ||
} else if ('media:category' == name) { | ||
meta.categories.push(getValue(el)); | ||
} | ||
} | ||
break; | ||
} // switch end | ||
// Fill with all native other namespaced properties | ||
if (name.indexOf('#') !== 0) { | ||
if (~name.indexOf(':')) meta[name] = el; | ||
else meta[type + ':' + name] = el; | ||
} | ||
}); // forEach end | ||
if (!meta.description) { | ||
if (node['itunes:summary']) meta.description = getValue(node['itunes:summary']); | ||
else if (node['tagline']) meta.description = getValue(node['tagline']); | ||
} | ||
if (!meta.author) { | ||
if (node['itunes:author']) meta.author = getValue(node['itunes:author']); | ||
else if (node['itunes:owner'] && node['itunes:owner']['itunes:name']) meta.author = getValue(node['itunes:owner']['itunes:name']); | ||
else if (node['dc:creator']) meta.author = getValue(node['dc:creator']); | ||
else if (node['dc:publisher']) meta.author = getValue(node['dc:publisher']); | ||
} | ||
if (!meta.language) { | ||
if (node['@']['xml:lang']) meta.language = getValue(node['@'], 'xml:lang'); | ||
else if (node['dc:language']) meta.language = getValue(node['dc:language']); | ||
} | ||
if (!meta.image.url) { | ||
if (node['itunes:image']) meta.image.url = getValue(node['itunes:image']['@'], 'href'); | ||
else if (node['media:thumbnail']) meta.image.url = getValue(node['media:thumbnail']['@'], 'url'); | ||
} | ||
if (!meta.copyright) { | ||
if (node['media:copyright']) meta.copyright = getValue(node['media:copyright']); | ||
else if (node['dc:rights']) meta.copyright = getValue(node['dc:rights']); | ||
else if (node['creativecommons:license']) meta.copyright = getValue(node['creativecommons:license']); | ||
else if (node['cc:license'] && node['cc:license']['@']['rdf:resource']) meta.copyright = getValue(node['cc:license']['@'], 'rdf:resource'); | ||
} | ||
if (!meta.generator) { | ||
if (node['admin:generatoragent'] && node['admin:generatoragent']['@']['rdf:resource']) meta.generator = getValue(node['admin:generatoragent']['@'], 'rdf:resource'); | ||
} | ||
if (meta.categories.length) | ||
meta.categories = meta.categories.unique(); | ||
return meta; | ||
} | ||
// Utility function to test for and extract a subkey | ||
function getValue(obj, subkey) { | ||
if (!subkey) | ||
subkey = '#'; | ||
if (obj && obj[subkey]) | ||
return obj[subkey]; | ||
else | ||
return null; | ||
function handleItem (node, type){ | ||
if (!type || !node) return {}; | ||
var item = {}; | ||
['title','description','summary','date','pubDate','link','guid','author','comments', 'origlink'].forEach(function (property){ | ||
item[property] = null; | ||
}); | ||
item.image = {}; | ||
item.source = {}; | ||
item.categories = []; | ||
item.enclosures = []; | ||
Object.keys(node).forEach(function(name){ | ||
var el = node[name]; | ||
switch(name){ | ||
case('title'): | ||
item.title = getValue(el); | ||
break; | ||
case('description'): | ||
case('summary'): | ||
item.summary = getValue(el); | ||
if (!item.description) item.description = getValue(el); | ||
break; | ||
case('content'): | ||
case('content:encoded'): | ||
item.description = getValue(el); | ||
break; | ||
case('pubdate'): | ||
case('published'): | ||
case('issued'): | ||
case('modified'): | ||
case('updated'): | ||
case('dc:date'): | ||
var date = getValue(el) ? new Date(el['#']) : null; | ||
if (!date) break; | ||
if (item.pubDate === null || name == 'pubdate' || name == 'published' || name == 'issued') | ||
item.pubDate = date; | ||
if (item.date === null || name == 'modified' || name == 'updated') | ||
item.date = date; | ||
break; | ||
case('link'): | ||
if (Array.isArray(el)) { | ||
el.forEach(function (link){ | ||
if (link['@']['href']) { // Atom | ||
if (getValue(link['@'], 'rel')) { | ||
if (link['@']['rel'] == 'alternate') item.link = link['@']['href']; | ||
if (link['@']['rel'] == 'replies') item.comments = link['@']['href']; | ||
if (link['@']['rel'] == 'enclosure') { | ||
var enclosure = {}; | ||
enclosure.url = link['@']['href']; | ||
enclosure.type = getValue(link['@'], 'type'); | ||
enclosure.length = getValue(link['@'], 'length'); | ||
item.enclosures.push(enclosure); | ||
} | ||
} else { | ||
item.link = link['@']['href']; | ||
} | ||
} else if (Object.keys(link['@']).length === 0) { // RSS | ||
if (!item.link) item.link = getValue(link); | ||
} | ||
}); | ||
} else { | ||
if (el['@']['href']) { // Atom | ||
if (getValue(el['@'], 'rel')) { | ||
if (el['@']['rel'] == 'alternate') item.link = el['@']['href']; | ||
if (el['@']['rel'] == 'replies') item.comments = el['@']['href']; | ||
if (el['@']['rel'] == 'enclosure') { | ||
var enclosure = {}; | ||
enclosure.url = el['@']['href']; | ||
enclosure.type = getValue(el['@'], 'type'); | ||
enclosure.length = getValue(el['@'], 'length'); | ||
item.enclosures.push(enclosure); | ||
} | ||
} else { | ||
item.link = el['@']['href']; | ||
} | ||
} else if (Object.keys(el['@']).length === 0) { // RSS | ||
if (!item.link) item.link = getValue(el); | ||
} | ||
} | ||
if (!item.guid) item.guid = item.link; | ||
break; | ||
case('guid'): | ||
case('id'): | ||
item.guid = getValue(el); | ||
break; | ||
case('author'): | ||
item.author = getValue(el.name) || getValue(el.email) || getValue(el.uri); | ||
break; | ||
case('dc:creator'): | ||
item.author = getValue(el); | ||
break; | ||
case('comments'): | ||
item.comments = getValue(el); | ||
break; | ||
case('source'): | ||
if ('rss' == type) { | ||
item.source['title'] = getValue(el); | ||
item.source['url'] = getValue(el['@'], 'url'); | ||
} else if ('atom' == type) { | ||
if (el.title && getValue(el.title)) | ||
item.source['title'] = getValue(el.title); | ||
if (el.link && getValue(el.link['@'], 'href')) | ||
item.source['url'] = getValue(el.link['@'], 'href'); | ||
} | ||
break; | ||
case('enclosure'): | ||
case('media:content'): | ||
if (Array.isArray(el)) { | ||
el.forEach(function (enc){ | ||
var enclosure = {}; | ||
enclosure.url = getValue(enc['@'], 'url'); | ||
enclosure.type = getValue(enc['@'], 'type') || getValue(enc['@'], 'medium'); | ||
enclosure.length = getValue(enc['@'], 'length') || getValue(enc['@'], 'filesize'); | ||
item.enclosures.push(enclosure); | ||
}); | ||
} else { | ||
var enclosure = {}; | ||
enclosure.url = getValue(el['@'], 'url'); | ||
enclosure.type = getValue(el['@'], 'type') || getValue(el['@'], 'medium'); | ||
enclosure.length = getValue(el['@'], 'length') || getValue(el['@'], 'filesize'); | ||
item.enclosures.push(enclosure); | ||
} | ||
break; | ||
case('enc:enclosure'): // Can't find this in use for an example to debug. Only example found does not comply with the spec -- can't code THAT! | ||
break; | ||
case('category'): | ||
case('dc:subject'): | ||
case('itunes:category'): | ||
case('media:category'): | ||
/* We handle all the kinds of categories within the switch loop because item.categories | ||
* is an array, unlike the other properties, and therefore can handle multiple values | ||
*/ | ||
if (Array.isArray(el)) { | ||
el.forEach(function (category){ | ||
if ('category' == name && 'atom' == type) { | ||
if (category['@'] && getValue(category['@'], 'term')) item.categories.push(getValue(category['@'], 'term')); | ||
} else if ('category' == name && 'rss' == type) { | ||
var categories = getValue(category).split(',').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) item.categories = item.categories.concat(categories); | ||
} else if ('dc:subject' == name) { | ||
var categories = getValue(category).split(' ').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) item.categories = item.categories.concat(categories); | ||
} else if ('itunes:category' == name) { | ||
var cat; | ||
if (category['@'] && getValue(category['@'], 'text')) cat = getValue(category['@'], 'text'); | ||
if (category[name]) { | ||
if (Array.isArray(category[name])) { | ||
category[name].forEach(function (subcategory){ | ||
if (subcategory['@'] && getValue(subcategory['@'], 'text')) item.categories.push(cat + '/' + getValue(subcategory['@'], 'text')); | ||
}); | ||
} else { | ||
if (category[name]['@'] && getValue(category[name]['@'], 'text')) | ||
item.categories.push(cat + '/' + getValue(category[name]['@'], 'text')); | ||
} | ||
} else { | ||
item.categories.push(cat); | ||
} | ||
} else if ('media:category' == name) { | ||
item.categories.push(getValue(category)); | ||
} | ||
}); | ||
} else { | ||
if ('category' == name && 'atom' == type) { | ||
if (getValue(el['@'], 'term')) item.categories.push(getValue(el['@'], 'term')); | ||
} else if ('category' == name && 'rss' == type) { | ||
var categories = getValue(el).split(',').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) item.categories = item.categories.concat(categories); | ||
} else if ('dc:subject' == name) { | ||
var categories = getValue(el).split(' ').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) item.categories = item.categories.concat(categories); | ||
} else if ('itunes:category' == name) { | ||
var cat; | ||
if (el['@'] && getValue(el['@'], 'text')) cat = getValue(el['@'], 'text'); | ||
if (el[name]) { | ||
if (Array.isArray(el[name])) { | ||
el[name].forEach(function (subcategory){ | ||
if (subcategory['@'] && getValue(subcategory['@'], 'text')) item.categories.push(cat + '/' + getValue(subcategory['@'], 'text')); | ||
}); | ||
} else { | ||
if (el[name]['@'] && getValue(el[name]['@'], 'text')) | ||
item.categories.push(cat + '/' + getValue(el[name]['@'], 'text')); | ||
} | ||
} else { | ||
item.categories.push(cat); | ||
} | ||
} else if ('media:category' == name) { | ||
item.categories.push(getValue(el)); | ||
} | ||
} | ||
break; | ||
case('feedburner:origlink'): | ||
case('pheedo:origlink'): | ||
item.origlink = getValue(el); | ||
break; | ||
} // switch end | ||
// Fill with all native other namespaced properties | ||
if (name.indexOf('#') !== 0) { | ||
if (~name.indexOf(':')) item[name] = el; | ||
else item[type + ':' + name] = el; | ||
} | ||
}); // forEach end | ||
if (!item.description) { | ||
if (node['itunes:summary']) item.description = getValue(node['itunes:summary']); | ||
} | ||
if (!item.author) { | ||
if (node['itunes:author']) item.author = getValue(node['itunes:author']); | ||
else if (node['itunes:owner'] && node['itunes:owner']['itunes:name']) item.author = getValue(node['itunes:owner']['itunes:name']); | ||
else if (node['dc:publisher']) item.author = getValue(node['dc:publisher']); | ||
} | ||
if (!item.image.url) { | ||
if (node['itunes:image']) item.image.url = getValue(node['itunes:image']['@'], 'href'); | ||
else if (node['media:thumbnail']) item.image.url = getValue(node['media:thumbnail']['@'], 'url'); | ||
else if (node['media:content'] && node['media:content']['media:thumbnail']) item.image.url = getValue(node['media:content']['media:thumbnail']['@'], 'url'); | ||
else if (node['media:group'] && node['media:group']['media:thumbnail']) item.image.url = getValue(node['media:group']['media:thumbnail']['@'], 'url'); | ||
else if (node['media:group'] && node['media:group']['media:content'] && node['media:group']['media:content']['media:thumbnail']) item.image.url = getValue(node['media:group']['media:content']['media:thumbnail']['@'], 'url'); | ||
} | ||
if (item.categories.length) | ||
item.categories = item.categories.unique(); | ||
return item; | ||
} | ||
@@ -68,5 +471,13 @@ | ||
* summary {String} | ||
* date {Date} (or null) | ||
* pubDate {Date} (or null) | ||
* link {String} | ||
* origlink {String} | ||
* author {String} | ||
* guid {String} | ||
* comments {String} | ||
* image {Object} | ||
* categories {Array} | ||
* source {Object} | ||
* enclosures {Array} | ||
* meta {Object} | ||
@@ -79,5 +490,13 @@ * Object.keys(meta): | ||
* description {String} | ||
* date {Date} (or null) | ||
* pubDate {Date} (or null) | ||
* link {String} i.e., to the homepage, not the feed | ||
* link {String} i.e., to the website, not the feed | ||
* xmlUrl {String} the canonical URL of the feed, as declared by the feed | ||
* author {String} | ||
* language {String} | ||
* image {Object} | ||
* favicon {String} | ||
* copyright {String} | ||
* generator {String} | ||
* categories {Array} | ||
* | ||
@@ -197,3 +616,3 @@ * Emits a 'warning' event on each XML parser warning | ||
Object.keys(attrs).forEach(function(name){ | ||
if (self.xmlbase.length && (name == 'href' || name == 'src')) { | ||
if (self.xmlbase.length && (name == 'href' || name == 'src' || name == 'uri')) { | ||
// Apply xml:base to these elements as they appear | ||
@@ -220,3 +639,3 @@ // rather than leaving it to the ultimate parser | ||
if (self.in_xhtml) { // We are in an xhtml node | ||
if (self.in_xhtml && self.xhtml['#name'] != n['#name']) { // We are in an xhtml node | ||
// This builds the opening tag, e.g., <div id='foo' class='bar'> | ||
@@ -261,2 +680,8 @@ self.xhtml['#'] += '<'+n['#name']; | ||
if (self.xmlbase.length && (el == 'logo' || el == 'icon')) { // Via atom | ||
// Apply xml:base to these elements as they appear | ||
// rather than leaving it to the ultimate parser | ||
n['#'] = url.resolve(self.xmlbase[0]['#'], n['#']); | ||
} | ||
if (self.xmlbase.length && (el == self.xmlbase[0]['#name'])) { | ||
@@ -268,2 +693,3 @@ void self.xmlbase.shift(); | ||
if (el == self.xhtml['#name']) { // The end of the XHTML | ||
// Add xhtml data to the container element | ||
@@ -273,3 +699,3 @@ n['#'] += self.xhtml['#'].trim(); | ||
for (var key in n) { | ||
if (key != 'attributes' && key != '#') { | ||
if (key != '@' && key != '#') { | ||
delete n[key]; | ||
@@ -298,310 +724,12 @@ } | ||
if (!self.meta.title) { // We haven't yet parsed all the metadata | ||
// Set all the meta keys to null | ||
self.meta.title = self.meta.description = self.meta.pubDate = self.meta.link = self.meta.xmlUrl | ||
= null; | ||
switch(self.meta['#type']){ | ||
case 'atom': | ||
self.meta.title = getValue(self.stack[0].title) | ||
self.meta.description = getValue(self.stack[0].subtitle); | ||
self.meta.pubDate = getValue(self.stack[0].updated) ? new Date(self.stack[0].updated['#']) : null; | ||
;['link', 'atom:link', 'atom10:link'].forEach(function (linkEl){ | ||
if ( self.stack[0][linkEl] ) { | ||
if ( self.stack[0][linkEl].length ) { | ||
self.stack[0][linkEl].forEach(function(link){ | ||
if (link['@'] && link['@']['href']) { | ||
if (link['@']['rel']) { | ||
switch(link['@']['rel']){ | ||
case('alternate'): | ||
self.meta.link = link['@']['href']; | ||
break; | ||
case('self'): | ||
self.meta.xmlUrl = link['@']['href']; | ||
break; | ||
}; | ||
} else { | ||
self.meta.link = link['@']['href']; | ||
} | ||
} | ||
}); | ||
} else { | ||
if (self.stack[0][linkEl]['@'] && self.stack[0][linkEl]['@']['href']) { | ||
if (self.stack[0][linkEl]['@']['rel']) { | ||
switch(self.stack[0][linkEl]['@']['rel']){ | ||
case('alternate'): | ||
self.meta.link = self.stack[0][linkEl]['@']['href']; | ||
break; | ||
case('self'): | ||
self.meta.xmlUrl = self.stack[0][linkEl]['@']['href']; | ||
break; | ||
}; | ||
} else { | ||
self.meta.link = self.stack[0][linkEl]['@']['href'] | ||
} | ||
} | ||
} | ||
} | ||
}); | ||
break; | ||
case 'rss': | ||
if (self.stack[0].title) { | ||
Object.keys(self.stack[0]).forEach(function(el){ | ||
switch(el){ | ||
case('title'): | ||
self.meta.title = getValue(self.stack[0][el]); | ||
break; | ||
case('description'): | ||
self.meta.description = getValue(self.stack[0][el]); | ||
break; | ||
case('pubdate'): | ||
case('lastbuilddate'): | ||
if (self.meta.pubDate === null || el == 'pubdate') | ||
self.meta.pubDate = getValue(self.stack[0][el]) ? new Date(self.stack[0][el]['#']) : null; | ||
break; | ||
case('link'): | ||
if (Array.isArray(self.stack[0][el])) { // How anyone thinks this is valid is beyond me... | ||
self.stack[0][el].forEach(function(link){ | ||
if(!self.meta.link && Object.keys(link['@']).length === 0) | ||
self.meta.link = getValue(link); | ||
}); | ||
} else { | ||
self.meta.link = getValue(self.stack[0][el]); | ||
} | ||
break; | ||
case('atom:link'): | ||
case('atom10:link'): | ||
if (Array.isArray(self.stack[0][el])) { | ||
self.stack[0][el].forEach(function(link){ | ||
if(link['@'] && getValue(link['@'], 'rel') == 'self') | ||
self.meta.xmlUrl = getValue(link['@'], 'href'); | ||
}); | ||
} else if (self.stack[0][el].constructor.name == 'Object' | ||
&& self.stack[0][el]['@'] | ||
&& getValue(self.stack[0][el]['@'], 'rel') == 'self') { | ||
self.meta.xmlUrl = getValue(self.stack[0][el]['@'], 'href'); | ||
} | ||
break; | ||
} | ||
}); | ||
} | ||
break; | ||
case 'rdf': | ||
if (self.stack[0].channel) { | ||
Object.keys(self.stack[0].channel).forEach(function(el){ | ||
switch(el){ | ||
case('title'): | ||
self.meta.title = getValue(self.stack[0].channel[el]); | ||
break; | ||
case('description'): | ||
self.meta.description = getValue(self.stack[0].channel[el]); | ||
break; | ||
case('dc:date'): | ||
self.meta.pubDate = getValue(self.stack[0].channel[el]) ? new Date(self.stack[0].channel[el]['#']) : null; | ||
break; | ||
case('link'): | ||
self.meta.link = getValue(self.stack[0].channel[el]); | ||
break; | ||
case('atom:link'): | ||
case('atom10:link'): | ||
if (Array.isArray(self.stack[0].channel[el])) { | ||
self.stack[0].channel[el].forEach(function(link){ | ||
if(link['@'] && getValue(link['@'], 'rel') == 'self') | ||
self.meta.xmlUrl = getValue(link['@'], 'href'); | ||
}); | ||
} else if (self.stack[0].channel[el].constructor.name == 'Object' | ||
&& self.stack[0].channel[el]['@'] | ||
&& getValue(self.stack[0].channel[el]['@'], 'rel') == 'self') { | ||
self.meta.xmlUrl = getValue(self.stack[0].channel[el]['@'], 'href'); | ||
} | ||
break; | ||
} | ||
}); | ||
} | ||
break; | ||
} | ||
Object.merge(self.meta, handleMeta(self.stack[0], self.meta['#type']), true); | ||
self.emit('meta', self.meta); | ||
} | ||
switch(self.meta['#type']){ | ||
case 'atom': | ||
var item = {}; | ||
item.title = getValue(n.title); | ||
if (getValue(n.content)) | ||
item.description = n.content['#']; | ||
else | ||
item.description = getValue(n.summary); | ||
item.summary = getValue(n.summary); | ||
if (getValue(n.updated)) | ||
item.pubDate = new Date(n.updated['#']); | ||
else | ||
item.pubDate = getValue(n.published) ? new Date(n.published['#']) : null; | ||
item.link = null; | ||
if ( n.link ) { | ||
if ( n.link.length ) { | ||
n.link.forEach(function(link){ | ||
if(link['@'] && (getValue(link['@'], 'rel') == 'alternate' || getValue(link['@'], 'rel') == null)) | ||
item.link = getValue(link['@'], 'href'); | ||
}); | ||
} else if (n.link['@'] && (getValue(n.link['@'], 'rel') == 'alternate' || getValue(n.link['@'], 'rel') == null)) { | ||
item.link = getValue(n.link['@'], 'href'); | ||
} | ||
} | ||
item.guid = getValue(n.id); | ||
item.meta = self.meta; | ||
self.emit('article', item); | ||
self.articles.push(item); | ||
break; | ||
case 'rss': | ||
var item = {}; | ||
item.title = getValue(n.title); | ||
item.description = item.summary = null; | ||
if (getValue(n.description)) | ||
item.description = item.summary = n.description['#']; | ||
if (getValue(n['content:encoded'])) | ||
item.description = n['content:encoded']['#']; | ||
item.pubDate = getValue(n.pubdate) ? new Date(n.pubdate['#']) : null; | ||
item.link = getValue(n.link); | ||
item.guid = getValue(n.guid); | ||
item.meta = self.meta; | ||
self.emit('article', item); | ||
self.articles.push(item); | ||
break; | ||
case 'rdf': | ||
var item = {}; | ||
item.title = getValue(n.title); | ||
item.description = item.summary = null; | ||
if (getValue(n.description)) | ||
item.description = item.summary = n.description['#']; | ||
if (getValue(n['content:encoded'])) | ||
item.description = n['content:encoded']['#']; | ||
item.pubDate = getValue(n['dc:date']) ? new Date(n['dc:date']['#']) : null; | ||
item.link = item.guid = getValue(n.link); | ||
if ( n['@'] ) | ||
item.guid = getValue(n['@'], 'rdf:about'); | ||
item.meta = self.meta; | ||
self.emit('article', item); | ||
self.articles.push(item); | ||
break; | ||
} | ||
item = handleItem(n, self.meta['#type']); | ||
item.meta = self.meta; | ||
if (self.meta.author && !item.author) item.author = self.meta.author; | ||
self.emit('article', item); | ||
self.articles.push(item); | ||
} else if ((el == 'channel' || el == 'feed') && !self.meta.title) { // We haven't yet parsed all the metadata | ||
// Set all the meta keys to null | ||
self.meta.title = self.meta.description = self.meta.pubDate = self.meta.link = self.meta.xmlUrl | ||
= null; | ||
switch(self.meta['#type']){ | ||
case 'atom': | ||
self.meta.title = getValue(n.title) | ||
self.meta.description = getValue(n.subtitle); | ||
self.meta.pubDate = getValue(n.updated) ? new Date(n.updated['#']) : null; | ||
;['link', 'atom:link', 'atom10:link'].forEach(function (linkEl){ | ||
if ( n[linkEl] ) { | ||
if ( n[linkEl].length ) { | ||
n[linkEl].forEach(function(link){ | ||
if (link['@'] && link['@']['href']) { | ||
if (link['@']['rel']) { | ||
switch(link['@']['rel']){ | ||
case('alternate'): | ||
self.meta.link = link['@']['href']; | ||
break; | ||
case('self'): | ||
self.meta.xmlUrl = link['@']['href']; | ||
break; | ||
}; | ||
} else { | ||
self.meta.link = link['@']['href']; | ||
} | ||
} | ||
}); | ||
} else { | ||
if (n[linkEl]['@'] && n[linkEl]['@']['href']) { | ||
if (n[linkEl]['@']['rel']) { | ||
switch(n[linkEl]['@']['rel']){ | ||
case('alternate'): | ||
self.meta.link = n[linkEl]['@']['href']; | ||
break; | ||
case('self'): | ||
self.meta.xmlUrl = n[linkEl]['@']['href']; | ||
break; | ||
}; | ||
} else { | ||
self.meta.link = n[linkEl]['@']['href']; | ||
} | ||
} | ||
} | ||
} | ||
}); | ||
break; | ||
case 'rss': | ||
if (n.title) { | ||
Object.keys(n).forEach(function(el){ | ||
switch(el){ | ||
case('title'): | ||
self.meta.title = getValue(n[el]); | ||
break; | ||
case('description'): | ||
self.meta.description = getValue(n[el]); | ||
break; | ||
case('pubdate'): | ||
case('lastbuilddate'): | ||
if (self.meta.pubDate === null || el == 'pubdate') | ||
self.meta.pubDate = getValue(n[el]) ? new Date(n[el]['#']) : null; | ||
break; | ||
case('link'): | ||
if (Array.isArray(n[el])) { // How anyone thinks this is valid is beyond me... | ||
n[el].forEach(function(link){ | ||
if(!self.meta.link && Object.keys(link['@']).length === 0) | ||
self.meta.link = getValue(link); | ||
}); | ||
} else { | ||
self.meta.link = getValue(n[el]); | ||
} | ||
break; | ||
case('atom:link'): | ||
case('atom10:link'): | ||
if (Array.isArray(n[el])) { | ||
n[el].forEach(function(link){ | ||
if(link['@'] && getValue(link['@'], 'rel') == 'self') | ||
self.meta.xmlUrl = getValue(link['@'], 'href'); | ||
}); | ||
} else if (n[el].constructor.name == 'Object' | ||
&& n[el]['@'] | ||
&& getValue(n[el]['@'], 'rel') == 'self') { | ||
self.meta.xmlUrl = getValue(n[el]['@'], 'href'); | ||
} | ||
break; | ||
} | ||
}); | ||
} | ||
break; | ||
case 'rdf': | ||
if (n.title) { | ||
Object.keys(n).forEach(function(el){ | ||
switch(el){ | ||
case('title'): | ||
self.meta.title = getValue(n[el]); | ||
break; | ||
case('description'): | ||
self.meta.description = getValue(n[el]); | ||
break; | ||
case('dc:date'): | ||
self.meta.pubDate = getValue(n[el]) ? new Date(n[el]['#']) : null; | ||
break; | ||
case('link'): | ||
self.meta.link = getValue(n[el]); | ||
break; | ||
case('atom:link'): | ||
case('atom10:link'): | ||
if (Array.isArray(n[el])) { | ||
n[el].forEach(function(link){ | ||
if(link['@'] && getValue(link['@'], 'rel') == 'self') | ||
self.meta.xmlUrl = getValue(link['@'], 'href'); | ||
}); | ||
} else if (n[el].constructor.name == 'Object' | ||
&& n[el]['@'] | ||
&& getValue(n[el]['@'], 'rel') == 'self') { | ||
self.meta.xmlUrl = getValue(n[el]['@'], 'href'); | ||
} | ||
break; | ||
} | ||
}); | ||
} | ||
break; | ||
} | ||
Object.merge(self.meta, handleMeta(n, self.meta['#type']), true); | ||
self.emit('meta', self.meta); | ||
@@ -608,0 +736,0 @@ } |
@@ -5,3 +5,3 @@ { | ||
"description": "Robust RSS, Atom, and RDF feed parsing using sax js", | ||
"version": "0.4.6", | ||
"version": "0.9.0", | ||
"repository": { | ||
@@ -8,0 +8,0 @@ "type": "git", |
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
840
116
41127
11
1