feedparser
Advanced tools
Comparing version 0.9.12 to 0.9.13
807
main.js
@@ -21,164 +21,172 @@ /********************************************************************** | ||
function handleMeta (node, type) { | ||
function handleMeta (node, type, options) { | ||
if (!type || !node) return {}; | ||
var meta = {}; | ||
['title','description','date', 'pubdate', 'pubDate','link', 'xmlurl', 'xmlUrl','author','language','favicon','copyright','generator'].forEach(function (property){ | ||
meta[property] = null; | ||
}); | ||
meta.image = {}; | ||
meta.categories = []; | ||
var meta = {} | ||
, normalize = !options || (options && options.normalize) | ||
; | ||
if (normalize) { | ||
['title','description','date', 'pubdate', 'pubDate','link', 'xmlurl', 'xmlUrl','author','language','favicon','copyright','generator'].forEach(function (property){ | ||
meta[property] = null; | ||
}); | ||
meta.image = {}; | ||
meta.categories = []; | ||
} | ||
Object.keys(node).forEach(function(name){ | ||
var el = node[name]; | ||
switch(name){ | ||
case('title'): | ||
meta.title = utils.get(el); | ||
break; | ||
case('description'): | ||
case('subtitle'): | ||
meta.description = utils.get(el); | ||
break; | ||
case('pubdate'): | ||
case('lastbuilddate'): | ||
case('published'): | ||
case('modified'): | ||
case('updated'): | ||
case('dc:date'): | ||
var date = utils.get(el) ? new Date(el['#']) : null; | ||
if (!date) break; | ||
if (meta.pubdate === null || name == 'pubdate' || name == 'published') | ||
meta.pubdate = meta.pubDate = date; | ||
if (meta.date === null || name == 'lastbuilddate' || name == 'modified' || name == 'updated') | ||
meta.date = date; | ||
break; | ||
case('link'): | ||
case('atom:link'): | ||
case('atom10:link'): | ||
if (Array.isArray(el)) { | ||
el.forEach(function (link){ | ||
if (link['@']['href']) { // Atom | ||
if (utils.get(link['@'], 'rel')) { | ||
if (link['@']['rel'] == 'alternate') meta.link = link['@']['href']; | ||
else if (link['@']['rel'] == 'self') meta.xmlurl = meta.xmlUrl = link['@']['href']; | ||
if (normalize) { | ||
switch(name){ | ||
case('title'): | ||
meta.title = utils.get(el); | ||
break; | ||
case('description'): | ||
case('subtitle'): | ||
meta.description = utils.get(el); | ||
break; | ||
case('pubdate'): | ||
case('lastbuilddate'): | ||
case('published'): | ||
case('modified'): | ||
case('updated'): | ||
case('dc:date'): | ||
var date = utils.get(el) ? new Date(el['#']) : null; | ||
if (!date) break; | ||
if (meta.pubdate === null || name == 'pubdate' || name == 'published') | ||
meta.pubdate = meta.pubDate = date; | ||
if (meta.date === null || name == 'lastbuilddate' || name == 'modified' || name == 'updated') | ||
meta.date = date; | ||
break; | ||
case('link'): | ||
case('atom:link'): | ||
case('atom10:link'): | ||
if (Array.isArray(el)) { | ||
el.forEach(function (link){ | ||
if (link['@']['href']) { // Atom | ||
if (utils.get(link['@'], 'rel')) { | ||
if (link['@']['rel'] == 'alternate') meta.link = link['@']['href']; | ||
else if (link['@']['rel'] == 'self') meta.xmlurl = meta.xmlUrl = link['@']['href']; | ||
} else { | ||
meta.link = link['@']['href']; | ||
} | ||
} else if (Object.keys(link['@']).length === 0) { // RSS | ||
if (!meta.link) meta.link = utils.get(link); | ||
} | ||
}); | ||
} else { | ||
if (el['@']['href']) { // Atom | ||
if (utils.get(el['@'], 'rel')) { | ||
if (el['@']['rel'] == 'alternate') meta.link = el['@']['href']; | ||
else if (el['@']['rel'] == 'self') meta.xmlurl = meta.xmlUrl = el['@']['href']; | ||
} else { | ||
meta.link = link['@']['href']; | ||
meta.link = el['@']['href']; | ||
} | ||
} else if (Object.keys(link['@']).length === 0) { // RSS | ||
if (!meta.link) meta.link = utils.get(link); | ||
} else if (Object.keys(el['@']).length === 0) { // RSS | ||
if (!meta.link) meta.link = utils.get(el); | ||
} | ||
}); | ||
} else { | ||
if (el['@']['href']) { // Atom | ||
if (utils.get(el['@'], 'rel')) { | ||
if (el['@']['rel'] == 'alternate') meta.link = el['@']['href']; | ||
else if (el['@']['rel'] == 'self') meta.xmlurl = meta.xmlUrl = el['@']['href']; | ||
} else { | ||
meta.link = el['@']['href']; | ||
} | ||
} else if (Object.keys(el['@']).length === 0) { // RSS | ||
if (!meta.link) meta.link = utils.get(el); | ||
} | ||
} | ||
break; | ||
case('managingeditor'): | ||
case('webmaster'): | ||
case('author'): | ||
if (meta.author === null || name == 'managingeditor') | ||
meta.author = utils.get(el); | ||
if (name == 'author') | ||
meta.author = utils.get(el.name) || utils.get(el.email) || utils.get(el.uri); | ||
break; | ||
case('language'): | ||
meta.language = utils.get(el); | ||
break; | ||
case('image'): | ||
case('logo'): | ||
if (el.url) | ||
meta.image.url = utils.get(el.url); | ||
if (el.title) | ||
meta.image.title = utils.get(el.title); | ||
else meta.image.url = utils.get(el); | ||
break; | ||
case('icon'): | ||
meta.favicon = utils.get(el); | ||
break; | ||
case('copyright'): | ||
case('rights'): | ||
case('dc:rights'): | ||
meta.copyright = utils.get(el); | ||
break; | ||
case('generator'): | ||
meta.generator = utils.get(el); | ||
if (utils.get(el['@'], 'version')) | ||
meta.generator += (meta.generator ? ' ' : '') + 'v' + el['@'].version; | ||
if (utils.get(el['@'], 'uri')) | ||
meta.generator += meta.generator ? ' (' + el['@'].uri + ')' : el['@'].uri; | ||
break; | ||
case('category'): | ||
case('dc:subject'): | ||
case('itunes:category'): | ||
case('media:category'): | ||
/* We handle all the kinds of categories within the switch loop because meta.categories | ||
* is an array, unlike the other properties, and therefore can handle multiple values | ||
*/ | ||
if (Array.isArray(el)) { | ||
el.forEach(function (category){ | ||
break; | ||
case('managingeditor'): | ||
case('webmaster'): | ||
case('author'): | ||
if (meta.author === null || name == 'managingeditor') | ||
meta.author = utils.get(el); | ||
if (name == 'author') | ||
meta.author = utils.get(el.name) || utils.get(el.email) || utils.get(el.uri); | ||
break; | ||
case('language'): | ||
meta.language = utils.get(el); | ||
break; | ||
case('image'): | ||
case('logo'): | ||
if (el.url) | ||
meta.image.url = utils.get(el.url); | ||
if (el.title) | ||
meta.image.title = utils.get(el.title); | ||
else meta.image.url = utils.get(el); | ||
break; | ||
case('icon'): | ||
meta.favicon = utils.get(el); | ||
break; | ||
case('copyright'): | ||
case('rights'): | ||
case('dc:rights'): | ||
meta.copyright = utils.get(el); | ||
break; | ||
case('generator'): | ||
meta.generator = utils.get(el); | ||
if (utils.get(el['@'], 'version')) | ||
meta.generator += (meta.generator ? ' ' : '') + 'v' + el['@'].version; | ||
if (utils.get(el['@'], 'uri')) | ||
meta.generator += meta.generator ? ' (' + el['@'].uri + ')' : el['@'].uri; | ||
break; | ||
case('category'): | ||
case('dc:subject'): | ||
case('itunes:category'): | ||
case('media:category'): | ||
/* We handle all the kinds of categories within the switch loop because meta.categories | ||
* is an array, unlike the other properties, and therefore can handle multiple values | ||
*/ | ||
var _category = '' | ||
, _categories = [] | ||
; | ||
if (Array.isArray(el)) { | ||
el.forEach(function (category){ | ||
if ('category' == name && 'atom' == type) { | ||
if (category['@'] && utils.get(category['@'], 'term')) meta.categories.push(utils.get(category['@'], 'term')); | ||
} else if ('category' == name && utils.get(category) && 'rss' == type) { | ||
_categories = utils.get(category).split(',').map(function (cat){ return cat.trim(); }); | ||
if (_categories.length) meta.categories = meta.categories.concat(_categories); | ||
} else if ('dc:subject' == name && utils.get(category)) { | ||
_categories = utils.get(category).split(' ').map(function (cat){ return cat.trim(); }); | ||
if (_categories.length) meta.categories = meta.categories.concat(_categories); | ||
} else if ('itunes:category' == name) { | ||
if (category['@'] && utils.get(category['@'], 'text')) _category = utils.get(category['@'], 'text'); | ||
if (category[name]) { | ||
if (Array.isArray(category[name])) { | ||
category[name].forEach(function (subcategory){ | ||
if (subcategory['@'] && utils.get(subcategory['@'], 'text')) meta.categories.push(_category + '/' + utils.get(subcategory['@'], 'text')); | ||
}); | ||
} else { | ||
if (category[name]['@'] && utils.get(category[name]['@'], 'text')) | ||
meta.categories.push(_category + '/' + utils.get(category[name]['@'], 'text')); | ||
} | ||
} else { | ||
meta.categories.push(_category); | ||
} | ||
} else if ('media:category' == name) { | ||
meta.categories.push(utils.get(category)); | ||
} | ||
}); | ||
} else { | ||
if ('category' == name && 'atom' == type) { | ||
if (category['@'] && utils.get(category['@'], 'term')) meta.categories.push(utils.get(category['@'], 'term')); | ||
} else if ('category' == name && utils.get(category) && 'rss' == type) { | ||
var categories = utils.get(category).split(',').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) meta.categories = meta.categories.concat(categories); | ||
} else if ('dc:subject' == name && utils.get(category)) { | ||
var categories = utils.get(category).split(' ').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) meta.categories = meta.categories.concat(categories); | ||
if (utils.get(el['@'], 'term')) meta.categories.push(utils.get(el['@'], 'term')); | ||
} else if ('category' == name && utils.get(el) && 'rss' == type) { | ||
_categories = utils.get(el).split(',').map(function (cat){ return cat.trim(); }); | ||
if (_categories.length) meta.categories = meta.categories.concat(_categories); | ||
} else if ('dc:subject' == name && utils.get(el)) { | ||
_categories = utils.get(el).split(' ').map(function (cat){ return cat.trim(); }); | ||
if (_categories.length) meta.categories = meta.categories.concat(_categories); | ||
} else if ('itunes:category' == name) { | ||
var cat; | ||
if (category['@'] && utils.get(category['@'], 'text')) cat = utils.get(category['@'], 'text'); | ||
if (category[name]) { | ||
if (Array.isArray(category[name])) { | ||
category[name].forEach(function (subcategory){ | ||
if (subcategory['@'] && utils.get(subcategory['@'], 'text')) meta.categories.push(cat + '/' + utils.get(subcategory['@'], 'text')); | ||
if (el['@'] && utils.get(el['@'], 'text')) _category = utils.get(el['@'], 'text'); | ||
if (el[name]) { | ||
if (Array.isArray(el[name])) { | ||
el[name].forEach(function (subcategory){ | ||
if (subcategory['@'] && utils.get(subcategory['@'], 'text')) meta.categories.push(_category + '/' + utils.get(subcategory['@'], 'text')); | ||
}); | ||
} else { | ||
if (category[name]['@'] && utils.get(category[name]['@'], 'text')) | ||
meta.categories.push(cat + '/' + utils.get(category[name]['@'], 'text')); | ||
if (el[name]['@'] && utils.get(el[name]['@'], 'text')) | ||
meta.categories.push(_category + '/' + utils.get(el[name]['@'], 'text')); | ||
} | ||
} else { | ||
meta.categories.push(cat); | ||
meta.categories.push(_category); | ||
} | ||
} else if ('media:category' == name) { | ||
meta.categories.push(utils.get(category)); | ||
meta.categories.push(utils.get(el)); | ||
} | ||
}); | ||
} else { | ||
if ('category' == name && 'atom' == type) { | ||
if (utils.get(el['@'], 'term')) meta.categories.push(utils.get(el['@'], 'term')); | ||
} else if ('category' == name && utils.get(el) && 'rss' == type) { | ||
var categories = utils.get(el).split(',').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) meta.categories = meta.categories.concat(categories); | ||
} else if ('dc:subject' == name && utils.get(el)) { | ||
var categories = utils.get(el).split(' ').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) meta.categories = meta.categories.concat(categories); | ||
} else if ('itunes:category' == name) { | ||
var cat; | ||
if (el['@'] && utils.get(el['@'], 'text')) cat = utils.get(el['@'], 'text'); | ||
if (el[name]) { | ||
if (Array.isArray(el[name])) { | ||
el[name].forEach(function (subcategory){ | ||
if (subcategory['@'] && utils.get(subcategory['@'], 'text')) meta.categories.push(cat + '/' + utils.get(subcategory['@'], 'text')); | ||
}); | ||
} else { | ||
if (el[name]['@'] && utils.get(el[name]['@'], 'text')) | ||
meta.categories.push(cat + '/' + utils.get(el[name]['@'], 'text')); | ||
} | ||
} else { | ||
meta.categories.push(cat); | ||
} | ||
} else if ('media:category' == name) { | ||
meta.categories.push(utils.get(el)); | ||
} | ||
} | ||
break; | ||
} // switch end | ||
break; | ||
} // switch end | ||
} | ||
// Fill with all native other namespaced properties | ||
@@ -190,246 +198,256 @@ if (name.indexOf('#') !== 0) { | ||
}); // forEach end | ||
if (!meta.description) { | ||
if (node['itunes:summary']) meta.description = utils.get(node['itunes:summary']); | ||
else if (node['tagline']) meta.description = utils.get(node['tagline']); | ||
} | ||
if (!meta.author) { | ||
if (node['itunes:author']) meta.author = utils.get(node['itunes:author']); | ||
else if (node['itunes:owner'] && node['itunes:owner']['itunes:name']) meta.author = utils.get(node['itunes:owner']['itunes:name']); | ||
else if (node['dc:creator']) meta.author = utils.get(node['dc:creator']); | ||
else if (node['dc:publisher']) meta.author = utils.get(node['dc:publisher']); | ||
} | ||
if (!meta.language) { | ||
if (node['@'] && node['@']['xml:lang']) meta.language = utils.get(node['@'], 'xml:lang'); | ||
else if (node['dc:language']) meta.language = utils.get(node['dc:language']); | ||
} | ||
if (!meta.image.url) { | ||
if (node['itunes:image']) meta.image.url = utils.get(node['itunes:image']['@'], 'href'); | ||
else if (node['media:thumbnail']) meta.image.url = utils.get(node['media:thumbnail']['@'], 'url'); | ||
} | ||
if (!meta.copyright) { | ||
if (node['media:copyright']) meta.copyright = utils.get(node['media:copyright']); | ||
else if (node['dc:rights']) meta.copyright = utils.get(node['dc:rights']); | ||
else if (node['creativecommons:license']) meta.copyright = utils.get(node['creativecommons:license']); | ||
else if (node['cc:license']) { | ||
if (Array.isArray(node['cc:license']) && node['cc:license'][0]['@'] && node['cc:license'][0]['@']['rdf:resource']) { | ||
meta.copyright = utils.get(node['cc:license'][0]['@'], 'rdf:resource'); | ||
} else if (node['cc:license']['@'] && node['cc:license']['@']['rdf:resource']) { | ||
meta.copyright = utils.get(node['cc:license']['@'], 'rdf:resource'); | ||
if (normalize) { | ||
if (!meta.description) { | ||
if (node['itunes:summary']) meta.description = utils.get(node['itunes:summary']); | ||
else if (node['tagline']) meta.description = utils.get(node['tagline']); | ||
} | ||
if (!meta.author) { | ||
if (node['itunes:author']) meta.author = utils.get(node['itunes:author']); | ||
else if (node['itunes:owner'] && node['itunes:owner']['itunes:name']) meta.author = utils.get(node['itunes:owner']['itunes:name']); | ||
else if (node['dc:creator']) meta.author = utils.get(node['dc:creator']); | ||
else if (node['dc:publisher']) meta.author = utils.get(node['dc:publisher']); | ||
} | ||
if (!meta.language) { | ||
if (node['@'] && node['@']['xml:lang']) meta.language = utils.get(node['@'], 'xml:lang'); | ||
else if (node['dc:language']) meta.language = utils.get(node['dc:language']); | ||
} | ||
if (!meta.image.url) { | ||
if (node['itunes:image']) meta.image.url = utils.get(node['itunes:image']['@'], 'href'); | ||
else if (node['media:thumbnail']) meta.image.url = utils.get(node['media:thumbnail']['@'], 'url'); | ||
} | ||
if (!meta.copyright) { | ||
if (node['media:copyright']) meta.copyright = utils.get(node['media:copyright']); | ||
else if (node['dc:rights']) meta.copyright = utils.get(node['dc:rights']); | ||
else if (node['creativecommons:license']) meta.copyright = utils.get(node['creativecommons:license']); | ||
else if (node['cc:license']) { | ||
if (Array.isArray(node['cc:license']) && node['cc:license'][0]['@'] && node['cc:license'][0]['@']['rdf:resource']) { | ||
meta.copyright = utils.get(node['cc:license'][0]['@'], 'rdf:resource'); | ||
} else if (node['cc:license']['@'] && node['cc:license']['@']['rdf:resource']) { | ||
meta.copyright = utils.get(node['cc:license']['@'], 'rdf:resource'); | ||
} | ||
} | ||
} | ||
} | ||
if (!meta.generator) { | ||
if (node['admin:generatoragent']) { | ||
if (Array.isArray(node['admin:generatoragent']) && node['admin:generatoragent'][0]['@'] && node['admin:generatoragent'][0]['@']['rdf:resource']) { | ||
meta.generator = utils.get(node['admin:generatoragent'][0]['@'], 'rdf:resource'); | ||
} else if (node['admin:generatoragent']['@'] && node['admin:generatoragent']['@']['rdf:resource']) { | ||
meta.generator = utils.get(node['admin:generatoragent']['@'], 'rdf:resource'); | ||
if (!meta.generator) { | ||
if (node['admin:generatoragent']) { | ||
if (Array.isArray(node['admin:generatoragent']) && node['admin:generatoragent'][0]['@'] && node['admin:generatoragent'][0]['@']['rdf:resource']) { | ||
meta.generator = utils.get(node['admin:generatoragent'][0]['@'], 'rdf:resource'); | ||
} else if (node['admin:generatoragent']['@'] && node['admin:generatoragent']['@']['rdf:resource']) { | ||
meta.generator = utils.get(node['admin:generatoragent']['@'], 'rdf:resource'); | ||
} | ||
} | ||
} | ||
if (meta.categories.length) | ||
meta.categories = utils.unique(meta.categories); | ||
} | ||
if (meta.categories.length) | ||
meta.categories = utils.unique(meta.categories); | ||
return meta; | ||
} | ||
function handleItem (node, type){ | ||
function handleItem (node, type, options){ | ||
if (!type || !node) return {}; | ||
var item = {}; | ||
['title','description','summary','date','pubdate','pubDate','link','guid','author','comments', 'origlink'].forEach(function (property){ | ||
item[property] = null; | ||
}); | ||
item.image = {}; | ||
item.source = {}; | ||
item.categories = []; | ||
item.enclosures = []; | ||
var item = {} | ||
, normalize = !options || (options && options.normalize) | ||
; | ||
if (normalize) { | ||
['title','description','summary','date','pubdate','pubDate','link','guid','author','comments', 'origlink'].forEach(function (property){ | ||
item[property] = null; | ||
}); | ||
item.image = {}; | ||
item.source = {}; | ||
item.categories = []; | ||
item.enclosures = []; | ||
} | ||
Object.keys(node).forEach(function(name){ | ||
var el = node[name]; | ||
switch(name){ | ||
case('title'): | ||
item.title = utils.get(el); | ||
break; | ||
case('description'): | ||
case('summary'): | ||
item.summary = utils.get(el); | ||
if (!item.description) item.description = utils.get(el); | ||
break; | ||
case('content'): | ||
case('content:encoded'): | ||
item.description = utils.get(el); | ||
break; | ||
case('pubdate'): | ||
case('published'): | ||
case('issued'): | ||
case('modified'): | ||
case('updated'): | ||
case('dc:date'): | ||
var date = utils.get(el) ? new Date(el['#']) : null; | ||
if (!date) break; | ||
if (item.pubdate === null || name == 'pubdate' || name == 'published' || name == 'issued') | ||
item.pubdate = item.pubDate = date; | ||
if (item.date === null || name == 'modified' || name == 'updated') | ||
item.date = date; | ||
break; | ||
case('link'): | ||
if (Array.isArray(el)) { | ||
el.forEach(function (link){ | ||
if (link['@']['href']) { // Atom | ||
if (utils.get(link['@'], 'rel')) { | ||
if (link['@']['rel'] == 'canonical') item.origlink = link['@']['href']; | ||
if (link['@']['rel'] == 'alternate') item.link = link['@']['href']; | ||
if (link['@']['rel'] == 'replies') item.comments = link['@']['href']; | ||
if (link['@']['rel'] == 'enclosure') { | ||
if (normalize) { | ||
switch(name){ | ||
case('title'): | ||
item.title = utils.get(el); | ||
break; | ||
case('description'): | ||
case('summary'): | ||
item.summary = utils.get(el); | ||
if (!item.description) item.description = utils.get(el); | ||
break; | ||
case('content'): | ||
case('content:encoded'): | ||
item.description = utils.get(el); | ||
break; | ||
case('pubdate'): | ||
case('published'): | ||
case('issued'): | ||
case('modified'): | ||
case('updated'): | ||
case('dc:date'): | ||
var date = utils.get(el) ? new Date(el['#']) : null; | ||
if (!date) break; | ||
if (item.pubdate === null || name == 'pubdate' || name == 'published' || name == 'issued') | ||
item.pubdate = item.pubDate = date; | ||
if (item.date === null || name == 'modified' || name == 'updated') | ||
item.date = date; | ||
break; | ||
case('link'): | ||
if (Array.isArray(el)) { | ||
el.forEach(function (link){ | ||
if (link['@']['href']) { // Atom | ||
if (utils.get(link['@'], 'rel')) { | ||
if (link['@']['rel'] == 'canonical') item.origlink = link['@']['href']; | ||
if (link['@']['rel'] == 'alternate') item.link = link['@']['href']; | ||
if (link['@']['rel'] == 'replies') item.comments = link['@']['href']; | ||
if (link['@']['rel'] == 'enclosure') { | ||
var enclosure = {}; | ||
enclosure.url = link['@']['href']; | ||
enclosure.type = utils.get(link['@'], 'type'); | ||
enclosure.length = utils.get(link['@'], 'length'); | ||
item.enclosures.push(enclosure); | ||
} | ||
} else { | ||
item.link = link['@']['href']; | ||
} | ||
} else if (Object.keys(link['@']).length === 0) { // RSS | ||
if (!item.link) item.link = utils.get(link); | ||
} | ||
}); | ||
} else { | ||
if (el['@']['href']) { // Atom | ||
if (utils.get(el['@'], 'rel')) { | ||
if (el['@']['rel'] == 'canonical') item.origlink = el['@']['href']; | ||
if (el['@']['rel'] == 'alternate') item.link = el['@']['href']; | ||
if (el['@']['rel'] == 'replies') item.comments = el['@']['href']; | ||
if (el['@']['rel'] == 'enclosure') { | ||
var enclosure = {}; | ||
enclosure.url = link['@']['href']; | ||
enclosure.type = utils.get(link['@'], 'type'); | ||
enclosure.length = utils.get(link['@'], 'length'); | ||
enclosure.url = el['@']['href']; | ||
enclosure.type = utils.get(el['@'], 'type'); | ||
enclosure.length = utils.get(el['@'], 'length'); | ||
item.enclosures.push(enclosure); | ||
} | ||
} else { | ||
item.link = link['@']['href']; | ||
item.link = el['@']['href']; | ||
} | ||
} else if (Object.keys(link['@']).length === 0) { // RSS | ||
if (!item.link) item.link = utils.get(link); | ||
} else if (Object.keys(el['@']).length === 0) { // RSS | ||
if (!item.link) item.link = utils.get(el); | ||
} | ||
}); | ||
} else { | ||
if (el['@']['href']) { // Atom | ||
if (utils.get(el['@'], 'rel')) { | ||
if (el['@']['rel'] == 'canonical') item.origlink = el['@']['href']; | ||
if (el['@']['rel'] == 'alternate') item.link = el['@']['href']; | ||
if (el['@']['rel'] == 'replies') item.comments = el['@']['href']; | ||
if (el['@']['rel'] == 'enclosure') { | ||
var enclosure = {}; | ||
enclosure.url = el['@']['href']; | ||
enclosure.type = utils.get(el['@'], 'type'); | ||
enclosure.length = utils.get(el['@'], 'length'); | ||
item.enclosures.push(enclosure); | ||
} | ||
if (!item.guid) item.guid = item.link; | ||
break; | ||
case('guid'): | ||
case('id'): | ||
item.guid = utils.get(el); | ||
break; | ||
case('author'): | ||
item.author = utils.get(el.name) || utils.get(el.email) || utils.get(el.uri); | ||
break; | ||
case('dc:creator'): | ||
item.author = utils.get(el); | ||
break; | ||
case('comments'): | ||
item.comments = utils.get(el); | ||
break; | ||
case('source'): | ||
if ('rss' == type) { | ||
item.source['title'] = utils.get(el); | ||
item.source['url'] = utils.get(el['@'], 'url'); | ||
} else if ('atom' == type) { | ||
if (el.title && utils.get(el.title)) | ||
item.source['title'] = utils.get(el.title); | ||
if (el.link && utils.get(el.link['@'], 'href')) | ||
item.source['url'] = utils.get(el.link['@'], 'href'); | ||
} | ||
break; | ||
case('enclosure'): | ||
case('media:content'): | ||
var _enclosure = {}; | ||
if (Array.isArray(el)) { | ||
el.forEach(function (enc){ | ||
_enclosure.url = utils.get(enc['@'], 'url'); | ||
_enclosure.type = utils.get(enc['@'], 'type') || utils.get(enc['@'], 'medium'); | ||
_enclosure.length = utils.get(enc['@'], 'length') || utils.get(enc['@'], 'filesize'); | ||
item.enclosures.push(_enclosure); | ||
}); | ||
} else { | ||
_enclosure.url = utils.get(el['@'], 'url'); | ||
_enclosure.type = utils.get(el['@'], 'type') || utils.get(el['@'], 'medium'); | ||
_enclosure.length = utils.get(el['@'], 'length') || utils.get(el['@'], 'filesize'); | ||
item.enclosures.push(_enclosure); | ||
} | ||
break; | ||
case('enc:enclosure'): // Can't find this in use for an example to debug. Only example found does not comply with the spec -- can't code THAT! | ||
break; | ||
case('category'): | ||
case('dc:subject'): | ||
case('itunes:category'): | ||
case('media:category'): | ||
/* We handle all the kinds of categories within the switch loop because item.categories | ||
* is an array, unlike the other properties, and therefore can handle multiple values | ||
*/ | ||
var _category = '' | ||
, _categories = [] | ||
; | ||
if (Array.isArray(el)) { | ||
el.forEach(function (category){ | ||
if ('category' == name && 'atom' == type) { | ||
if (category['@'] && utils.get(category['@'], 'term')) item.categories.push(utils.get(category['@'], 'term')); | ||
} else if ('category' == name && utils.get(category) && 'rss' == type) { | ||
_categories = utils.get(category).split(',').map(function (cat){ return cat.trim(); }); | ||
if (_categories.length) item.categories = item.categories.concat(_categories); | ||
} else if ('dc:subject' == name && utils.get(category)) { | ||
_categories = utils.get(category).split(' ').map(function (cat){ return cat.trim(); }); | ||
if (_categories.length) item.categories = item.categories.concat(_categories); | ||
} else if ('itunes:category' == name) { | ||
if (category['@'] && utils.get(category['@'], 'text')) _category = utils.get(category['@'], 'text'); | ||
if (category[name]) { | ||
if (Array.isArray(category[name])) { | ||
category[name].forEach(function (subcategory){ | ||
if (subcategory['@'] && utils.get(subcategory['@'], 'text')) item.categories.push(_category + '/' + utils.get(subcategory['@'], 'text')); | ||
}); | ||
} else { | ||
if (category[name]['@'] && utils.get(category[name]['@'], 'text')) | ||
item.categories.push(_category + '/' + utils.get(category[name]['@'], 'text')); | ||
} | ||
} else { | ||
item.categories.push(_category); | ||
} | ||
} else if ('media:category' == name) { | ||
item.categories.push(utils.get(category)); | ||
} | ||
} else { | ||
item.link = el['@']['href']; | ||
} | ||
} else if (Object.keys(el['@']).length === 0) { // RSS | ||
if (!item.link) item.link = utils.get(el); | ||
} | ||
} | ||
if (!item.guid) item.guid = item.link; | ||
break; | ||
case('guid'): | ||
case('id'): | ||
item.guid = utils.get(el); | ||
break; | ||
case('author'): | ||
item.author = utils.get(el.name) || utils.get(el.email) || utils.get(el.uri); | ||
break; | ||
case('dc:creator'): | ||
item.author = utils.get(el); | ||
break; | ||
case('comments'): | ||
item.comments = utils.get(el); | ||
break; | ||
case('source'): | ||
if ('rss' == type) { | ||
item.source['title'] = utils.get(el); | ||
item.source['url'] = utils.get(el['@'], 'url'); | ||
} else if ('atom' == type) { | ||
if (el.title && utils.get(el.title)) | ||
item.source['title'] = utils.get(el.title); | ||
if (el.link && utils.get(el.link['@'], 'href')) | ||
item.source['url'] = utils.get(el.link['@'], 'href'); | ||
} | ||
break; | ||
case('enclosure'): | ||
case('media:content'): | ||
if (Array.isArray(el)) { | ||
el.forEach(function (enc){ | ||
var enclosure = {}; | ||
enclosure.url = utils.get(enc['@'], 'url'); | ||
enclosure.type = utils.get(enc['@'], 'type') || utils.get(enc['@'], 'medium'); | ||
enclosure.length = utils.get(enc['@'], 'length') || utils.get(enc['@'], 'filesize'); | ||
item.enclosures.push(enclosure); | ||
}); | ||
} else { | ||
var enclosure = {}; | ||
enclosure.url = utils.get(el['@'], 'url'); | ||
enclosure.type = utils.get(el['@'], 'type') || utils.get(el['@'], 'medium'); | ||
enclosure.length = utils.get(el['@'], 'length') || utils.get(el['@'], 'filesize'); | ||
item.enclosures.push(enclosure); | ||
} | ||
break; | ||
case('enc:enclosure'): // Can't find this in use for an example to debug. Only example found does not comply with the spec -- can't code THAT! | ||
break; | ||
case('category'): | ||
case('dc:subject'): | ||
case('itunes:category'): | ||
case('media:category'): | ||
/* We handle all the kinds of categories within the switch loop because item.categories | ||
* is an array, unlike the other properties, and therefore can handle multiple values | ||
*/ | ||
if (Array.isArray(el)) { | ||
el.forEach(function (category){ | ||
}); | ||
} else { | ||
if ('category' == name && 'atom' == type) { | ||
if (category['@'] && utils.get(category['@'], 'term')) item.categories.push(utils.get(category['@'], 'term')); | ||
} else if ('category' == name && utils.get(category) && 'rss' == type) { | ||
var categories = utils.get(category).split(',').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) item.categories = item.categories.concat(categories); | ||
} else if ('dc:subject' == name && utils.get(category)) { | ||
var categories = utils.get(category).split(' ').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) item.categories = item.categories.concat(categories); | ||
if (utils.get(el['@'], 'term')) item.categories.push(utils.get(el['@'], 'term')); | ||
} else if ('category' == name && utils.get(el) && 'rss' == type) { | ||
_categories = utils.get(el).split(',').map(function (cat){ return cat.trim(); }); | ||
if (_categories.length) item.categories = item.categories.concat(_categories); | ||
} else if ('dc:subject' == name && utils.get(el)) { | ||
_categories = utils.get(el).split(' ').map(function (cat){ return cat.trim(); }); | ||
if (_categories.length) item.categories = item.categories.concat(_categories); | ||
} else if ('itunes:category' == name) { | ||
var cat; | ||
if (category['@'] && utils.get(category['@'], 'text')) cat = utils.get(category['@'], 'text'); | ||
if (category[name]) { | ||
if (Array.isArray(category[name])) { | ||
category[name].forEach(function (subcategory){ | ||
if (subcategory['@'] && utils.get(subcategory['@'], 'text')) item.categories.push(cat + '/' + utils.get(subcategory['@'], 'text')); | ||
if (el['@'] && utils.get(el['@'], 'text')) _category = utils.get(el['@'], 'text'); | ||
if (el[name]) { | ||
if (Array.isArray(el[name])) { | ||
el[name].forEach(function (subcategory){ | ||
if (subcategory['@'] && utils.get(subcategory['@'], 'text')) item.categories.push(_category + '/' + utils.get(subcategory['@'], 'text')); | ||
}); | ||
} else { | ||
if (category[name]['@'] && utils.get(category[name]['@'], 'text')) | ||
item.categories.push(cat + '/' + utils.get(category[name]['@'], 'text')); | ||
if (el[name]['@'] && utils.get(el[name]['@'], 'text')) | ||
item.categories.push(_category + '/' + utils.get(el[name]['@'], 'text')); | ||
} | ||
} else { | ||
item.categories.push(cat); | ||
item.categories.push(_category); | ||
} | ||
} else if ('media:category' == name) { | ||
item.categories.push(utils.get(category)); | ||
item.categories.push(utils.get(el)); | ||
} | ||
}); | ||
} else { | ||
if ('category' == name && 'atom' == type) { | ||
if (utils.get(el['@'], 'term')) item.categories.push(utils.get(el['@'], 'term')); | ||
} else if ('category' == name && utils.get(el) && 'rss' == type) { | ||
var categories = utils.get(el).split(',').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) item.categories = item.categories.concat(categories); | ||
} else if ('dc:subject' == name && utils.get(el)) { | ||
var categories = utils.get(el).split(' ').map(function (cat){ return cat.trim(); }); | ||
if (categories.length) item.categories = item.categories.concat(categories); | ||
} else if ('itunes:category' == name) { | ||
var cat; | ||
if (el['@'] && utils.get(el['@'], 'text')) cat = utils.get(el['@'], 'text'); | ||
if (el[name]) { | ||
if (Array.isArray(el[name])) { | ||
el[name].forEach(function (subcategory){ | ||
if (subcategory['@'] && utils.get(subcategory['@'], 'text')) item.categories.push(cat + '/' + utils.get(subcategory['@'], 'text')); | ||
}); | ||
} else { | ||
if (el[name]['@'] && utils.get(el[name]['@'], 'text')) | ||
item.categories.push(cat + '/' + utils.get(el[name]['@'], 'text')); | ||
} | ||
} else { | ||
item.categories.push(cat); | ||
} | ||
} else if ('media:category' == name) { | ||
item.categories.push(utils.get(el)); | ||
} | ||
} | ||
break; | ||
case('feedburner:origlink'): | ||
case('pheedo:origlink'): | ||
if (!item.origlink) { | ||
item.origlink = utils.get(el); | ||
} | ||
break; | ||
} // switch end | ||
break; | ||
case('feedburner:origlink'): | ||
case('pheedo:origlink'): | ||
if (!item.origlink) { | ||
item.origlink = utils.get(el); | ||
} | ||
break; | ||
} // switch end | ||
} | ||
// Fill with all native other namespaced properties | ||
@@ -441,19 +459,22 @@ if (name.indexOf('#') !== 0) { | ||
}); // forEach end | ||
if (!item.description) { | ||
if (node['itunes:summary']) item.description = utils.get(node['itunes:summary']); | ||
if (normalize) { | ||
if (!item.description) { | ||
if (node['itunes:summary']) item.description = utils.get(node['itunes:summary']); | ||
} | ||
if (!item.author) { | ||
if (node['itunes:author']) item.author = utils.get(node['itunes:author']); | ||
else if (node['itunes:owner'] && node['itunes:owner']['itunes:name']) item.author = utils.get(node['itunes:owner']['itunes:name']); | ||
else if (node['dc:publisher']) item.author = utils.get(node['dc:publisher']); | ||
} | ||
if (!item.image.url) { | ||
if (node['itunes:image']) item.image.url = utils.get(node['itunes:image']['@'], 'href'); | ||
else if (node['media:thumbnail']) item.image.url = utils.get(node['media:thumbnail']['@'], 'url'); | ||
else if (node['media:content'] && node['media:content']['media:thumbnail']) item.image.url = utils.get(node['media:content']['media:thumbnail']['@'], 'url'); | ||
else if (node['media:group'] && node['media:group']['media:thumbnail']) item.image.url = utils.get(node['media:group']['media:thumbnail']['@'], 'url'); | ||
else if (node['media:group'] && node['media:group']['media:content'] && node['media:group']['media:content']['media:thumbnail']) item.image.url = utils.get(node['media:group']['media:content']['media:thumbnail']['@'], 'url'); | ||
} | ||
if (item.categories.length) | ||
item.categories = utils.unique(item.categories); | ||
} | ||
if (!item.author) { | ||
if (node['itunes:author']) item.author = utils.get(node['itunes:author']); | ||
else if (node['itunes:owner'] && node['itunes:owner']['itunes:name']) item.author = utils.get(node['itunes:owner']['itunes:name']); | ||
else if (node['dc:publisher']) item.author = utils.get(node['dc:publisher']); | ||
} | ||
if (!item.image.url) { | ||
if (node['itunes:image']) item.image.url = utils.get(node['itunes:image']['@'], 'href'); | ||
else if (node['media:thumbnail']) item.image.url = utils.get(node['media:thumbnail']['@'], 'url'); | ||
else if (node['media:content'] && node['media:content']['media:thumbnail']) item.image.url = utils.get(node['media:content']['media:thumbnail']['@'], 'url'); | ||
else if (node['media:group'] && node['media:group']['media:thumbnail']) item.image.url = utils.get(node['media:group']['media:thumbnail']['@'], 'url'); | ||
else if (node['media:group'] && node['media:group']['media:content'] && node['media:group']['media:content']['media:thumbnail']) item.image.url = utils.get(node['media:group']['media:content']['media:thumbnail']['@'], 'url'); | ||
} | ||
if (item.categories.length) | ||
item.categories = utils.unique(item.categories); | ||
return item; | ||
@@ -467,12 +488,16 @@ } | ||
*/ | ||
function FeedParser () { | ||
function FeedParser (options) { | ||
var parser = this; | ||
parser.options = options || {}; | ||
if (!('normalize' in parser.options)) parser.options.normalize = true; | ||
if (!('addMetaToItems' in parser.options)) parser.options.addMetaToItems = true; | ||
parser._reset(); | ||
parser.stream = sax.createStream(false /* strict mode - no */, {lowercase: true}); // https://github.com/isaacs/sax-js | ||
parser.stream.on('error', function (e){ parser.handleSaxError(e, parser) }); | ||
parser.stream.on('opentag', function (n){ parser.handleOpenTag(n, parser) }); | ||
parser.stream.on('closetag', function (el){ parser.handleCloseTag(el, parser) }); | ||
parser.stream.on('text', function (text){ parser.handleText(text, parser) }); | ||
parser.stream.on('cdata', function (text){ parser.handleText(text, parser) }); | ||
parser.stream.on('end', function (){ parser.handleEnd(parser) }); | ||
parser.stream.on('error', function (e){ parser.handleSaxError(e, parser); }); | ||
parser.stream.on('opentag', function (n){ parser.handleOpenTag(n, parser); }); | ||
parser.stream.on('closetag', function (el){ parser.handleCloseTag(el, parser); }); | ||
parser.stream.on('text', function (text){ parser.handleText(text, parser); }); | ||
parser.stream.on('cdata', function (text){ parser.handleText(text, parser); }); | ||
parser.stream.on('end', function (){ parser.handleEnd(parser); }); | ||
EventEmitter.call(parser); | ||
@@ -589,3 +614,3 @@ } | ||
* | ||
* @param {String} fully qualified uri or a parsed url object from url.parse() | ||
* @param {Readable Stream} | ||
* @param {Function} callback | ||
@@ -668,3 +693,3 @@ * @api public | ||
return attrs; | ||
}; | ||
} | ||
@@ -682,3 +707,3 @@ if (Object.keys(node.attributes).length) { | ||
parser.xhtml['#'] += '>'; | ||
} else if (parser.stack.length == 0 && | ||
} else if (parser.stack.length === 0 && | ||
(n['#name'] == 'rss' || n['#name'] == 'rdf:rdf' || n['#name'] == 'feed')) { | ||
@@ -690,3 +715,3 @@ parser.meta['#ns'] = []; | ||
o[name] = n['@'][name]; | ||
if (name.indexOf('xmlns') == 0) { | ||
if (name.indexOf('xmlns') === 0) { | ||
parser.meta['#ns'].push(o); | ||
@@ -762,7 +787,9 @@ } else if (name != 'version') { | ||
if (!parser.meta.title) { // We haven't yet parsed all the metadata | ||
utils.merge(parser.meta, handleMeta(parser.stack[0], parser.meta['#type'])); | ||
utils.merge(parser.meta, handleMeta(parser.stack[0], parser.meta['#type'], parser.options)); | ||
parser.emit('meta', parser.meta); | ||
} | ||
item = handleItem(n, parser.meta['#type']); | ||
item.meta = parser.meta; | ||
item = handleItem(n, parser.meta['#type'], parser.options); | ||
if (parser.options.addMetaToItems) { | ||
item.meta = parser.meta; | ||
} | ||
if (parser.meta.author && !item.author) item.author = parser.meta.author; | ||
@@ -772,3 +799,3 @@ parser.emit('article', item); | ||
} else if ((el == 'channel' || el == 'feed') && !parser.meta.title) { // We haven't yet parsed all the metadata | ||
utils.merge(parser.meta, handleMeta(n, parser.meta['#type'])); | ||
utils.merge(parser.meta, handleMeta(n, parser.meta['#type'], parser.options)); | ||
parser.emit('meta', parser.meta); | ||
@@ -817,8 +844,8 @@ } | ||
this.callback = undefined; | ||
} | ||
}; | ||
FeedParser.prototype._setCallback = function (callback){ | ||
this.callback = ('function' == typeof callback) ? callback : undefined; | ||
} | ||
}; | ||
exports = module.exports = FeedParser; |
{ "name" : "feedparser" | ||
, "author" : "Dan MacTough <danmactough@gmail.com>" | ||
, "description" : "Robust RSS Atom and RDF feed parsing using sax js" | ||
, "version": "0.9.12" | ||
, "version": "0.9.13" | ||
, "keywords" : ["rss", "feed", "atom", "rdf", "xml", "syndication"] | ||
@@ -6,0 +6,0 @@ , "homepage" : "http://github.com/danmactough/node-feedparser" |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
43246
890
6