feedparser
Advanced tools
Comparing version 0.9.15 to 0.10.0-beta
135
main.js
@@ -53,4 +53,16 @@ /********************************************************************** | ||
function handleAttributes (attrs, el) { | ||
/* | ||
* Using the sax.js option { xmlns: true } | ||
* attrs is an array of objects (not strings) having the following properties | ||
* name - e.g., xmlns:dc or href | ||
* value | ||
* prefix - the first part of the name of the attribute (before the colon) | ||
* local - the second part of the name of the attribute (after the colon) | ||
* uri - the uri of the namespace | ||
* | ||
*/ | ||
var parser = this | ||
, basepath = '' | ||
, simplifiedAttributes = {} | ||
; | ||
@@ -62,19 +74,33 @@ | ||
Object.keys(attrs).forEach(function(name){ | ||
if (basepath && (name == 'href' || name == 'src' || name == 'uri')) { | ||
Object.keys(attrs).forEach(function(key){ | ||
var attr = attrs[key] | ||
, ns = {} | ||
, prefix = '' | ||
; | ||
if (attr.prefix === 'xmlns') { | ||
ns[attr.name] = attr.value; | ||
parser.meta['#ns'].push(ns); | ||
} | ||
// If the feed is using a non-default prefix, we'll use it, too | ||
// But we force the use of the 'xml' prefix | ||
if (attr.uri && attr.prefix && !utils.nslookup(attr.uri, attr.prefix) || utils.nslookup(attr.uri, 'xml')) { | ||
prefix = ( utils.nsprefix(attr.uri) || attr.prefix ) + ( attr.local ? ':' : '' ); | ||
} | ||
if (basepath && (attr.local == 'href' || attr.local == 'src' || attr.local == 'uri')) { | ||
// Apply xml:base to these elements as they appear | ||
// rather than leaving it to the ultimate parser | ||
attrs[name] = utils.resolve(basepath, attrs[name]); | ||
} else if (name == 'xml:base') { | ||
attr.value = utils.resolve(basepath, attr.value); | ||
} else if (attr.local === 'base' && utils.nslookup(attr.uri, 'xml')) { | ||
// Keep track of the xml:base for the current node | ||
if (basepath) { | ||
attrs[name] = utils.resolve(basepath, attrs[name]); | ||
attr.value = utils.resolve(basepath, attr.value); | ||
} | ||
parser.xmlbase.unshift({ '#name': el, '#': attrs[name]}); | ||
} else if (name == 'type' && attrs['type'] == 'xhtml') { | ||
parser.xmlbase.unshift({ '#name': el, '#': attr.value}); | ||
} else if (attr.name === 'type' && attr.value === 'xhtml') { | ||
parser.in_xhtml = true; | ||
parser.xhtml = {'#name': el, '#': ''}; | ||
} | ||
attrs[name] = attrs[name].trim(); | ||
simplifiedAttributes[prefix + attr.local] = attr.value.trim(); | ||
}); | ||
return attrs; | ||
return simplifiedAttributes; | ||
} | ||
@@ -100,2 +126,3 @@ | ||
var el = node[name]; | ||
if (normalize) { | ||
@@ -568,3 +595,3 @@ switch(name){ | ||
if (parser.options.feedurl) parser.xmlbase.unshift({ '#name': 'xml', '#': parser.options.feedurl}); | ||
parser.stream = sax.createStream(false /* strict mode - no */, {lowercase: true}); // https://github.com/isaacs/sax-js | ||
parser.stream = sax.createStream(false /* strict mode - no */, {lowercase: true, xmlns: true }); // https://github.com/isaacs/sax-js | ||
parser.stream.on('error', function (e){ parser.handleSaxError(e, parser); }); | ||
@@ -696,3 +723,3 @@ parser.stream.on('opentag', function (n){ parser.handleOpenTag(n, parser); }); | ||
} | ||
if (!parser.xmlbase.length) { // .parseFile may have already populated this value | ||
if (!parser.xmlbase.length) { // #parseFile may have already populated this value | ||
if (/^https?:/.test(url)) { | ||
@@ -791,2 +818,5 @@ parser.xmlbase.unshift({ '#name': 'xml', '#': url}); | ||
n['#name'] = node.name; // Avoid namespace collissions later... | ||
n['#prefix'] = node.prefix; // The current ns prefix | ||
n['#local'] = node.local; // The current element name, sans prefix | ||
n['#uri'] = node.uri; // The current ns uri | ||
n['@'] = {}; | ||
@@ -806,16 +836,14 @@ n['#'] = ''; | ||
parser.xhtml['#'] += '>'; | ||
} else if (parser.stack.length === 0 && | ||
(n['#name'] == 'rss' || n['#name'] == 'rdf:rdf' || n['#name'] == 'feed')) { | ||
parser.meta['#ns'] = []; | ||
parser.meta['@'] = []; | ||
Object.keys(n['@']).forEach(function(name) { | ||
} else if ( parser.stack.length === 0 && | ||
(n['#name'] === 'rss' || | ||
(n['#local'] === 'rdf' && utils.nslookup([n['#uri']], 'rdf')) || | ||
(n['#local'] === 'feed'&& utils.nslookup([n['#uri']], 'atom')) ) ) { | ||
Object.keys(n['@']).forEach(function(name) { | ||
var o = {}; | ||
o[name] = n['@'][name]; | ||
if (name.indexOf('xmlns') === 0) { | ||
parser.meta['#ns'].push(o); | ||
} else if (name != 'version') { | ||
if (name != 'version') { | ||
o[name] = n['@'][name]; | ||
parser.meta['@'].push(o); | ||
} | ||
}); | ||
switch(n['#name']) { | ||
switch(n['#local']) { | ||
case 'rss': | ||
@@ -825,3 +853,3 @@ parser.meta['#type'] = 'rss'; | ||
break; | ||
case 'rdf:rdf': | ||
case 'rdf': | ||
parser.meta['#type'] = 'rdf'; | ||
@@ -841,2 +869,6 @@ parser.meta['#version'] = n['@']['version'] || '1.0'; | ||
var parser = scope | ||
, node = { '#name' : el | ||
, '#prefix' : '' | ||
, '#local' : '' } | ||
, stdEl | ||
, item | ||
@@ -846,3 +878,25 @@ , baseurl | ||
var n = parser.stack.shift(); | ||
el = el.split(':'); | ||
if (el.length > 1 && el[0] === n['#prefix']) { | ||
if (utils.nslookup(n['#uri'], 'atom')) { | ||
node['#prefix'] = el[0]; | ||
node['#local'] = el.slice(1).join(':'); | ||
node['#type'] = 'atom'; | ||
} else if (utils.nslookup(n['#uri'], 'rdf')) { | ||
node['#prefix'] = el[0]; | ||
node['#local'] = el.slice(1).join(':'); | ||
node['#type'] = 'rdf'; | ||
} else { | ||
node['#prefix'] = utils.nsprefix(n['#uri']) || n['#prefix']; | ||
node['#local'] = el.slice(1).join(':'); | ||
} | ||
} else { | ||
node['#local'] = node['#name']; | ||
node['#type'] = utils.nsprefix(n['#uri']) || n['#prefix']; | ||
} | ||
delete n['#name']; | ||
delete n['#local']; | ||
delete n['#prefix']; | ||
delete n['#uri']; | ||
@@ -853,3 +907,3 @@ if (parser.xmlbase && parser.xmlbase.length) { | ||
if (baseurl && (el == 'logo' || el == 'icon')) { // Via atom | ||
if (baseurl && (node['#local'] === 'logo' || node['#local'] === 'icon') && node['#type'] === 'atom') { | ||
// Apply xml:base to these elements as they appear | ||
@@ -865,3 +919,3 @@ // rather than leaving it to the ultimate parser | ||
if (parser.in_xhtml) { | ||
if (el == parser.xhtml['#name']) { // The end of the XHTML | ||
if (node['#name'] == parser.xhtml['#name']) { // The end of the XHTML | ||
@@ -879,3 +933,3 @@ // Add xhtml data to the container element | ||
} else { // Somewhere in the middle of the XHTML | ||
parser.xhtml['#'] += '</' + el + '>'; | ||
parser.xhtml['#'] += '</' + node['#name'] + '>'; | ||
} | ||
@@ -886,2 +940,3 @@ } | ||
if (n['#'].match(/^\s*$/)) { | ||
// Delete text nodes with nothing by whitespace | ||
delete n['#']; | ||
@@ -891,2 +946,3 @@ } else { | ||
if (Object.keys(n).length === 1) { | ||
// If there is only one text node, hoist it | ||
n = n['#']; | ||
@@ -897,3 +953,7 @@ } | ||
if (el == 'item' || el == 'entry') { // We have an article! | ||
if (node['#name'] === 'item' || | ||
node['#name'] === 'entry' || | ||
(node['#local'] === 'item' && (node['#prefix'] === '' || node['#type'] === 'rdf')) || | ||
(node['#local'] == 'entry' && (node['#prefix'] === '' || node['#type'] === 'atom'))) { // We have an article! | ||
if (!parser.meta.title) { // We haven't yet parsed all the metadata | ||
@@ -913,3 +973,7 @@ utils.merge(parser.meta, handleMeta.call(parser, parser.stack[0], parser.meta['#type'], parser.options)); | ||
parser.articles.push(item); | ||
} else if ((el == 'channel' || el == 'feed') && !parser.meta.title) { // We haven't yet parsed all the metadata | ||
} else if (!parser.meta.title && // We haven't yet parsed all the metadata | ||
(node['#name'] === 'channel' || | ||
node['#name'] === 'feed' || | ||
(node['#local'] === 'channel' && (node['#prefix'] === '' || node['#type'] === 'rdf')) || | ||
(node['#local'] === 'feed' && (node['#prefix'] === '' || node['#type'] === 'atom')) ) ) { | ||
utils.merge(parser.meta, handleMeta(n, parser.meta['#type'], parser.options)); | ||
@@ -920,9 +984,14 @@ parser.emit('meta', parser.meta); | ||
if (parser.stack.length > 0) { | ||
if (!parser.stack[0].hasOwnProperty(el)) { | ||
parser.stack[0][el] = n; | ||
} else if (parser.stack[0][el] instanceof Array) { | ||
parser.stack[0][el].push(n); | ||
if (node['#prefix'] && node['#local'] && !node['#type']) { | ||
stdEl = node['#prefix'] + ':' + node['#local']; | ||
} else { | ||
parser.stack[0][el] = [parser.stack[0][el], n]; | ||
stdEl = node['#local'] || node['#name']; | ||
} | ||
if (!parser.stack[0].hasOwnProperty(stdEl)) { | ||
parser.stack[0][stdEl] = n; | ||
} else if (parser.stack[0][stdEl] instanceof Array) { | ||
parser.stack[0][stdEl].push(n); | ||
} else { | ||
parser.stack[0][stdEl] = [parser.stack[0][stdEl], n]; | ||
} | ||
} else { | ||
@@ -950,2 +1019,4 @@ parser.nodes = n; | ||
this.meta = {}; | ||
this.meta['#ns'] = []; | ||
this.meta['@'] = []; | ||
this.articles = []; | ||
@@ -952,0 +1023,0 @@ this.stack = []; |
{ "name" : "feedparser" | ||
, "author" : "Dan MacTough <danmactough@gmail.com>" | ||
, "description" : "Robust RSS Atom and RDF feed parsing using sax js" | ||
, "version": "0.9.15" | ||
, "version": "0.10.0-beta" | ||
, "keywords" : ["rss", "feed", "atom", "rdf", "xml", "syndication"] | ||
@@ -6,0 +6,0 @@ , "homepage" : "http://github.com/danmactough/node-feedparser" |
26
utils.js
@@ -6,2 +6,3 @@ | ||
var URL = require('url') | ||
, NS = require('./namespaces') | ||
; | ||
@@ -98,2 +99,25 @@ | ||
} | ||
exports.resolve = resolve; | ||
exports.resolve = resolve; | ||
/* | ||
* Check whether a given namespace URI matches the given default | ||
* | ||
* @param {String} URI | ||
* @param {String} default, e.g., 'atom' | ||
* @return {Boolean} | ||
*/ | ||
function nslookup (uri, def) { | ||
return NS[uri] === def; | ||
} | ||
exports.nslookup = nslookup; | ||
/* | ||
* Return the "default" namespace prefix for a given namespace URI | ||
* | ||
* @param {String} URI | ||
* @return {String} | ||
*/ | ||
function nsprefix (uri) { | ||
return NS[uri]; | ||
} | ||
exports.nsprefix = nsprefix; |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Non-existent author
Supply chain riskThe package was published by an npm account that no longer exists.
Found 1 instance in 1 package
442847
32
1482
0