feedparser
Advanced tools
Comparing version 0.3.0 to 0.3.1
@@ -40,13 +40,2 @@ /********************************************************************** | ||
var meta = {} | ||
, articles = [] | ||
, stack = [] | ||
, nodes = {} | ||
, xmlbase = [] | ||
, in_xhtml = false | ||
, xhtml = {}; | ||
/* Where to store xhtml elements as associative | ||
array with keys: '#' (containing the text) | ||
and '#name' (containing the XML element name) */ | ||
/** | ||
@@ -59,8 +48,17 @@ * FeedParser constructor. Most apps will only use one instance. | ||
var self = this; | ||
this.saxStream = require('sax').createStream(false, {lowercasetags: true}); // https://github.com/isaacs/sax-js | ||
this.saxStream.on('error', function (e) { self.handleError(e, self) }); | ||
this.saxStream.on('opentag', function (n) { self.handleOpenTag(n, self) }); | ||
this.saxStream.on('closetag', function (el) { self.handleCloseTag(el, self) }); | ||
this.saxStream.on('text', function (text) { self.handleText(text, self) }); | ||
this.saxStream.on('cdata', function (text) { self.handleText(text, self) }); | ||
self.meta = {}; | ||
self.articles = []; | ||
self.stack = []; | ||
self.nodes = {}; | ||
self.xmlbase = []; | ||
self.in_xhtml = false; | ||
self.xhtml = {}; /* Where to store xhtml elements as associative | ||
array with keys: '#' (containing the text) | ||
and '#name' (containing the XML element name) */ | ||
self.saxStream = require('sax').createStream(false, {lowercasetags: true}); // https://github.com/isaacs/sax-js | ||
self.saxStream.on('error', function (e) { self.handleError(e, self) }); | ||
self.saxStream.on('opentag', function (n) { self.handleOpenTag(n, self) }); | ||
self.saxStream.on('closetag', function (el) { self.handleCloseTag(el, self) }); | ||
self.saxStream.on('text', function (text) { self.handleText(text, self) }); | ||
self.saxStream.on('cdata', function (text) { self.handleText(text, self) }); | ||
events.EventEmitter.call(this); | ||
@@ -105,8 +103,8 @@ } | ||
this.saxStream.on('end', function () { | ||
if (articles.length) { | ||
self.emit('end', articles); | ||
self.emit('done', articles); // deprecated | ||
if (self.articles.length) { | ||
self.emit('end', self.articles); | ||
self.emit('done', self.articles); // deprecated | ||
} | ||
if ('function' == typeof callback) { | ||
callback(meta, articles); | ||
callback(self.meta, self.articles); | ||
} | ||
@@ -129,8 +127,8 @@ }); | ||
self.saxStream.on('end', function () { | ||
if (articles.length) { | ||
self.emit('end', articles); | ||
self.emit('done', articles); // deprecated | ||
if (self.articles.length) { | ||
self.emit('end', self.articles); | ||
self.emit('done', self.articles); // deprecated | ||
} | ||
if ('function' == typeof callback) { | ||
callback(meta, articles); | ||
callback(self.meta, self.articles); | ||
} | ||
@@ -161,8 +159,8 @@ }); | ||
self.saxStream.on('end', function () { | ||
if (articles.length) { | ||
self.emit('end', articles); | ||
self.emit('done', articles); // deprecated | ||
if (self.articles.length) { | ||
self.emit('end', self.articles); | ||
self.emit('done', self.articles); // deprecated | ||
} | ||
if ('function' == typeof callback) { | ||
callback(meta, articles); | ||
callback(self.meta, self.articles); | ||
} | ||
@@ -191,8 +189,8 @@ }); | ||
self.saxStream.on('end', function () { | ||
if (articles.length) { | ||
self.emit('end', articles); | ||
self.emit('done', articles); // deprecated | ||
if (self.articles.length) { | ||
self.emit('end', self.articles); | ||
self.emit('done', self.articles); // deprecated | ||
} | ||
if ('function' == typeof callback) { | ||
callback(meta, articles); | ||
callback(self.meta, self.articles); | ||
} | ||
@@ -211,2 +209,3 @@ }); | ||
FeedParser.prototype.handleOpenTag = function (node, scope){ | ||
var self = scope; | ||
var n = {}; | ||
@@ -219,14 +218,14 @@ n['#name'] = node.name; // Avoid namespace collissions later... | ||
Object.keys(attrs).forEach(function(name){ | ||
if (xmlbase.length && (name == 'href' || name == 'src')) { | ||
if (self.xmlbase.length && (name == 'href' || name == 'src')) { | ||
// Apply xml:base to these elements as they appear | ||
// rather than leaving it to the ultimate parser | ||
attrs[name] = url.resolve(xmlbase[0]['#'], attrs[name]); | ||
attrs[name] = url.resolve(self.xmlbase[0]['#'], attrs[name]); | ||
} else if (name == 'xml:base') { | ||
if (xmlbase.length) { | ||
attrs[name] = url.resolve(xmlbase[0]['#'], attrs[name]); | ||
if (self.xmlbase.length) { | ||
attrs[name] = url.resolve(self.xmlbase[0]['#'], attrs[name]); | ||
} | ||
xmlbase.unshift({ '#name': el, '#': attrs[name]}); | ||
self.xmlbase.unshift({ '#name': el, '#': attrs[name]}); | ||
} else if (name == 'type' && attrs['type'] == 'xhtml') { | ||
in_xhtml = true; | ||
xhtml = {'#name': el, '#': ''}; | ||
self.in_xhtml = true; | ||
self.xhtml = {'#name': el, '#': ''}; | ||
} | ||
@@ -242,12 +241,12 @@ attrs[name] = attrs[name].trim(); | ||
if (in_xhtml) { // We are in an xhtml node | ||
if (self.in_xhtml) { // We are in an xhtml node | ||
// This builds the opening tag, e.g., <div id='foo' class='bar'> | ||
xhtml['#'] += '<'+n['#name']; | ||
self.xhtml['#'] += '<'+n['#name']; | ||
Object.keys(n['@']).forEach(function(name){ | ||
xhtml['#'] += ' '+ name +'="'+ n['@'][name] + '"'; | ||
self.xhtml['#'] += ' '+ name +'="'+ n['@'][name] + '"'; | ||
}); | ||
xhtml['#'] += '>'; | ||
} else if (stack.length == 0 && | ||
self.xhtml['#'] += '>'; | ||
} else if (self.stack.length == 0 && | ||
(n['#name'] == 'rss' || n['#name'] == 'rdf:rdf' || n['#name'] == 'feed')) { | ||
meta['#ns'] = []; | ||
self.meta['#ns'] = []; | ||
Object.keys(n['@']).forEach(function(name) { | ||
@@ -257,3 +256,3 @@ if (name.indexOf('xmlns') == 0) { | ||
o[name] = n['@'][name]; | ||
meta['#ns'].push(o); | ||
self.meta['#ns'].push(o); | ||
} | ||
@@ -263,16 +262,16 @@ }); | ||
case 'rss': | ||
meta['#type'] = 'rss'; | ||
meta['#version'] = n['@']['version']; | ||
self.meta['#type'] = 'rss'; | ||
self.meta['#version'] = n['@']['version']; | ||
break; | ||
case 'rdf:rdf': | ||
meta['#type'] = 'rdf'; | ||
meta['#version'] = n['@']['version'] || '1.0'; | ||
self.meta['#type'] = 'rdf'; | ||
self.meta['#version'] = n['@']['version'] || '1.0'; | ||
break; | ||
case 'feed': | ||
meta['#type'] = 'atom'; | ||
meta['#version'] = n['@']['version'] || '1.0'; | ||
self.meta['#type'] = 'atom'; | ||
self.meta['#version'] = n['@']['version'] || '1.0'; | ||
break; | ||
} | ||
} | ||
stack.unshift(n); | ||
self.stack.unshift(n); | ||
}; | ||
@@ -282,13 +281,13 @@ | ||
var self = scope; | ||
var n = stack.shift(); | ||
var n = self.stack.shift(); | ||
delete n['#name']; | ||
if (xmlbase.length && (el == xmlbase[0]['#name'])) { | ||
void xmlbase.shift(); | ||
if (self.xmlbase.length && (el == self.xmlbase[0]['#name'])) { | ||
void self.xmlbase.shift(); | ||
} | ||
if (in_xhtml) { | ||
if (el == xhtml['#name']) { // The end of the XHTML | ||
if (self.in_xhtml) { | ||
if (el == self.xhtml['#name']) { // The end of the XHTML | ||
// Add xhtml data to the container element | ||
n['#'] += xhtml['#'].trim(); | ||
n['#'] += self.xhtml['#'].trim(); | ||
// Clear xhtml nodes from the tree | ||
@@ -300,6 +299,6 @@ for (var key in n) { | ||
} | ||
xhtml = {}; | ||
in_xhtml = false; | ||
self.xhtml = {}; | ||
self.in_xhtml = false; | ||
} else { // Somewhere in the middle of the XHTML | ||
xhtml['#'] += '</' + el + '>'; | ||
self.xhtml['#'] += '</' + el + '>'; | ||
} | ||
@@ -320,20 +319,20 @@ } | ||
if (el == 'item' || el == 'entry') { // We have an article! | ||
if (!meta.title) { // We haven't yet parsed all the metadata | ||
if (!self.meta.title) { // We haven't yet parsed all the metadata | ||
// Set all the meta keys to null | ||
meta.title = meta.description = meta.pubDate = meta.link = meta.xmlUrl | ||
self.meta.title = self.meta.description = self.meta.pubDate = self.meta.link = self.meta.xmlUrl | ||
= null; | ||
switch(meta['#type']){ | ||
switch(self.meta['#type']){ | ||
case 'atom': | ||
meta.title = getValue(stack[0].title) | ||
meta.description = getValue(stack[0].subtitle); | ||
meta.pubDate = getValue(stack[0].updated) ? new Date(stack[0].updated['#']) : null; | ||
if ( stack[0].link && stack[0].link.length ) { | ||
stack[0].link.forEach(function(link){ | ||
self.meta.title = getValue(self.stack[0].title) | ||
self.meta.description = getValue(self.stack[0].subtitle); | ||
self.meta.pubDate = getValue(self.stack[0].updated) ? new Date(self.stack[0].updated['#']) : null; | ||
if ( self.stack[0].link && self.stack[0].link.length ) { | ||
self.stack[0].link.forEach(function(link){ | ||
if (link['@'] && link['@']['rel'] && link['@']['href']) | ||
switch(link['@']['rel']){ | ||
case('alternate'): | ||
meta.link = link['@']['href']; | ||
self.meta.link = link['@']['href']; | ||
break; | ||
case('self'): | ||
meta.xmlUrl = link['@']['href']; | ||
self.meta.xmlUrl = link['@']['href']; | ||
break; | ||
@@ -345,29 +344,29 @@ }; | ||
case 'rss': | ||
if (stack[0].title) { | ||
Object.keys(stack[0]).forEach(function(el){ | ||
if (self.stack[0].title) { | ||
Object.keys(self.stack[0]).forEach(function(el){ | ||
switch(el){ | ||
case('title'): | ||
meta.title = getValue(stack[0][el]); | ||
self.meta.title = getValue(self.stack[0][el]); | ||
break; | ||
case('description'): | ||
meta.description = getValue(stack[0][el]); | ||
self.meta.description = getValue(self.stack[0][el]); | ||
break; | ||
case('pubdate'): | ||
case('lastbuilddate'): | ||
if (meta.pubDate === null || el == 'pubdate') | ||
meta.pubDate = getValue(stack[0][el]) ? new Date(stack[0][el]['#']) : null; | ||
if (self.meta.pubDate === null || el == 'pubdate') | ||
self.meta.pubDate = getValue(self.stack[0][el]) ? new Date(self.stack[0][el]['#']) : null; | ||
break; | ||
case('link'): | ||
meta.link = getValue(stack[0][el]); | ||
self.meta.link = getValue(self.stack[0][el]); | ||
break; | ||
case('atom:link'): | ||
if (Array.isArray(stack[0][el])) { | ||
stack[0][el].forEach(function(link){ | ||
if (Array.isArray(self.stack[0][el])) { | ||
self.stack[0][el].forEach(function(link){ | ||
if(link['@'] && getValue(link['@'], 'rel') == 'self') | ||
meta.xmlUrl = getValue(link['@'], 'href'); | ||
self.meta.xmlUrl = getValue(link['@'], 'href'); | ||
}); | ||
} else if (stack[0][el].constructor.name == 'Object' | ||
&& stack[0][el]['@'] | ||
&& getValue(stack[0][el]['@'], 'rel') == 'self') { | ||
meta.xmlUrl = getValue(stack[0][el]['@'], 'href'); | ||
} else if (self.stack[0][el].constructor.name == 'Object' | ||
&& self.stack[0][el]['@'] | ||
&& getValue(self.stack[0][el]['@'], 'rel') == 'self') { | ||
self.meta.xmlUrl = getValue(self.stack[0][el]['@'], 'href'); | ||
} | ||
@@ -380,27 +379,27 @@ break; | ||
case 'rdf': | ||
if (stack[0].channel) { | ||
Object.keys(stack[0].channel).forEach(function(el){ | ||
if (self.stack[0].channel) { | ||
Object.keys(self.stack[0].channel).forEach(function(el){ | ||
switch(el){ | ||
case('title'): | ||
meta.title = getValue(stack[0].channel[el]); | ||
self.meta.title = getValue(self.stack[0].channel[el]); | ||
break; | ||
case('description'): | ||
meta.description = getValue(stack[0].channel[el]); | ||
self.meta.description = getValue(self.stack[0].channel[el]); | ||
break; | ||
case('dc:date'): | ||
meta.pubDate = getValue(stack[0].channel[el]) ? new Date(stack[0].channel[el]['#']) : null; | ||
self.meta.pubDate = getValue(self.stack[0].channel[el]) ? new Date(self.stack[0].channel[el]['#']) : null; | ||
break; | ||
case('link'): | ||
meta.link = getValue(stack[0].channel[el]); | ||
self.meta.link = getValue(self.stack[0].channel[el]); | ||
break; | ||
case('atom:link'): | ||
if (Array.isArray(stack[0].channel[el])) { | ||
stack[0].channel[el].forEach(function(link){ | ||
if (Array.isArray(self.stack[0].channel[el])) { | ||
self.stack[0].channel[el].forEach(function(link){ | ||
if(link['@'] && getValue(link['@'], 'rel') == 'self') | ||
meta.xmlUrl = getValue(link['@'], 'href'); | ||
self.meta.xmlUrl = getValue(link['@'], 'href'); | ||
}); | ||
} else if (stack[0].channel[el].constructor.name == 'Object' | ||
&& stack[0].channel[el]['@'] | ||
&& getValue(stack[0].channel[el]['@'], 'rel') == 'self') { | ||
meta.xmlUrl = getValue(stack[0].channel[el]['@'], 'href'); | ||
} else if (self.stack[0].channel[el].constructor.name == 'Object' | ||
&& self.stack[0].channel[el]['@'] | ||
&& getValue(self.stack[0].channel[el]['@'], 'rel') == 'self') { | ||
self.meta.xmlUrl = getValue(self.stack[0].channel[el]['@'], 'href'); | ||
} | ||
@@ -413,5 +412,5 @@ break; | ||
} | ||
self.emit('meta', meta); | ||
self.emit('meta', self.meta); | ||
} | ||
switch(meta['#type']){ | ||
switch(self.meta['#type']){ | ||
case 'atom': | ||
@@ -441,5 +440,5 @@ var item = {}; | ||
item.guid = getValue(n.id); | ||
item.meta = meta; | ||
item.meta = self.meta; | ||
self.emit('article', item); | ||
articles.push(item); | ||
self.articles.push(item); | ||
break; | ||
@@ -457,5 +456,5 @@ case 'rss': | ||
item.guid = getValue(n.guid); | ||
item.meta = meta; | ||
item.meta = self.meta; | ||
self.emit('article', item); | ||
articles.push(item); | ||
self.articles.push(item); | ||
break; | ||
@@ -474,5 +473,5 @@ case 'rdf': | ||
item.guid = getValue(n['@'], 'rdf:about'); | ||
item.meta = meta; | ||
item.meta = self.meta; | ||
self.emit('article', item); | ||
articles.push(item); | ||
self.articles.push(item); | ||
break; | ||
@@ -482,12 +481,12 @@ } | ||
if (stack.length > 0) { | ||
if (!stack[0].hasOwnProperty(el)) { | ||
stack[0][el] = n; | ||
} else if (stack[0][el] instanceof Array) { | ||
stack[0][el].push(n); | ||
if (self.stack.length > 0) { | ||
if (!self.stack[0].hasOwnProperty(el)) { | ||
self.stack[0][el] = n; | ||
} else if (self.stack[0][el] instanceof Array) { | ||
self.stack[0][el].push(n); | ||
} else { | ||
stack[0][el] = [stack[0][el], n]; | ||
self.stack[0][el] = [self.stack[0][el], n]; | ||
} | ||
} else { | ||
nodes = n; | ||
self.nodes = n; | ||
} | ||
@@ -498,10 +497,10 @@ }; | ||
var self = scope; | ||
if (in_xhtml) { | ||
xhtml['#'] += text; | ||
if (self.in_xhtml) { | ||
self.xhtml['#'] += text; | ||
} else { | ||
if (stack.length) { | ||
if ('#' in stack[0]) { | ||
stack[0]['#'] += text; | ||
if (self.stack.length) { | ||
if ('#' in self.stack[0]) { | ||
self.stack[0]['#'] += text; | ||
} else { | ||
stack[0]['#'] = text; | ||
self.stack[0]['#'] = text; | ||
} | ||
@@ -508,0 +507,0 @@ } |
@@ -5,3 +5,3 @@ { | ||
"description": "Robust RSS, Atom, and RDF feed parsing using sax js", | ||
"version": "0.3.0", | ||
"version": "0.3.1", | ||
"repository": { | ||
@@ -8,0 +8,0 @@ "type": "git", |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
20299