feedparser
Advanced tools
Comparing version 0.9.13 to 0.9.14
178
main.js
@@ -15,3 +15,2 @@ /********************************************************************** | ||
, fs = require('fs') | ||
, url = require('url') | ||
, util = require('util') | ||
@@ -22,6 +21,67 @@ , EventEmitter = require('events').EventEmitter | ||
function reresolve (node, baseurl) { | ||
if (!node || !baseurl) { | ||
return false; // Nothing to do. | ||
} | ||
function resolveLevel (level) { | ||
var els = Object.keys(level); | ||
els.forEach(function(el){ | ||
if (Array.isArray(level[el])) { | ||
level[el].forEach(resolveLevel); | ||
} else { | ||
if (level[el].constructor.name === 'Object') { | ||
if (el == 'logo' || el == 'icon') { | ||
level[el]['#'] = utils.resolve(baseurl, level[el]['#']); | ||
} else { | ||
var attrs = Object.keys(level[el]); | ||
attrs.forEach(function(name){ | ||
if (name == 'href' || name == 'src' || name == 'uri') { | ||
level[el][name] = utils.resolve(baseurl, level[el][name]); | ||
} | ||
}); | ||
} | ||
} | ||
} | ||
}); | ||
return level; | ||
} | ||
return resolveLevel(node); | ||
} | ||
function handleAttributes (attrs, el) { | ||
var parser = this | ||
, basepath = '' | ||
; | ||
if (parser.xmlbase && parser.xmlbase.length) { | ||
basepath = parser.xmlbase[0]['#']; | ||
} | ||
Object.keys(attrs).forEach(function(name){ | ||
if (basepath && (name == 'href' || name == 'src' || name == 'uri')) { | ||
// Apply xml:base to these elements as they appear | ||
// rather than leaving it to the ultimate parser | ||
attrs[name] = utils.resolve(basepath, attrs[name]); | ||
} else if (name == 'xml:base') { | ||
if (basepath) { | ||
attrs[name] = utils.resolve(basepath, attrs[name]); | ||
} | ||
parser.xmlbase.unshift({ '#name': el, '#': attrs[name]}); | ||
} else if (name == 'type' && attrs['type'] == 'xhtml') { | ||
parser.in_xhtml = true; | ||
parser.xhtml = {'#name': el, '#': ''}; | ||
} | ||
attrs[name] = attrs[name].trim(); | ||
}); | ||
return attrs; | ||
} | ||
function handleMeta (node, type, options) { | ||
if (!type || !node) return {}; | ||
var meta = {} | ||
var parser = this | ||
, meta = {} | ||
, normalize = !options || (options && options.normalize) | ||
@@ -70,3 +130,9 @@ ; | ||
if (link['@']['rel'] == 'alternate') meta.link = link['@']['href']; | ||
else if (link['@']['rel'] == 'self') meta.xmlurl = meta.xmlUrl = link['@']['href']; | ||
else if (link['@']['rel'] == 'self') { | ||
meta.xmlurl = meta.xmlUrl = link['@']['href']; | ||
if (parser.xmlbase && parser.xmlbase.length === 0) { | ||
parser.xmlbase.unshift({ '#name': 'xml', '#': meta.xmlurl}); | ||
parser.stack[0] = reresolve(parser.stack[0], meta.xmlurl); | ||
} | ||
} | ||
} else { | ||
@@ -83,3 +149,9 @@ meta.link = link['@']['href']; | ||
if (el['@']['rel'] == 'alternate') meta.link = el['@']['href']; | ||
else if (el['@']['rel'] == 'self') meta.xmlurl = meta.xmlUrl = el['@']['href']; | ||
else if (el['@']['rel'] == 'self') { | ||
meta.xmlurl = meta.xmlUrl = el['@']['href']; | ||
if (parser.xmlbase && parser.xmlbase.length === 0) { | ||
parser.xmlbase.unshift({ '#name': 'xml', '#': meta.xmlurl}); | ||
parser.stack[0] = reresolve(parser.stack[0], meta.xmlurl); | ||
} | ||
} | ||
} else { | ||
@@ -492,6 +564,7 @@ meta.link = el['@']['href']; | ||
var parser = this; | ||
parser._reset(); | ||
parser.options = options || {}; | ||
if (!('normalize' in parser.options)) parser.options.normalize = true; | ||
if (!('addMetaToItems' in parser.options)) parser.options.addMetaToItems = true; | ||
parser._reset(); | ||
if (!('addmeta' in parser.options)) parser.options.addmeta = true; | ||
if (parser.options.feedurl) parser.xmlbase.unshift({ '#name': 'xml', '#': parser.options.feedurl}); | ||
parser.stream = sax.createStream(false /* strict mode - no */, {lowercase: true}); // https://github.com/isaacs/sax-js | ||
@@ -555,4 +628,13 @@ parser.stream.on('error', function (e){ parser.handleSaxError(e, parser); }); | ||
FeedParser.prototype.parseString = function(string, callback) { | ||
FeedParser.prototype.parseString = function(string, options, callback) { | ||
var parser = this; | ||
if (arguments.length === 2 && typeof options === 'function') { | ||
callback = options; | ||
options = null; | ||
} | ||
if (options) { | ||
if ('normalize' in options) parser.options.normalize = options.normalize; | ||
if ('addmeta' in options) parser.options.addmeta = options.addmeta; | ||
if (options.feedurl) parser.xmlbase.unshift({ '#name': 'xml', '#': options.feedurl}); | ||
} | ||
parser._setCallback(callback); | ||
@@ -572,4 +654,13 @@ parser.stream | ||
FeedParser.prototype.parseFile = function(file, callback) { | ||
FeedParser.prototype.parseFile = function(file, options, callback) { | ||
var parser = this; | ||
if (arguments.length === 2 && typeof options === 'function') { | ||
callback = options; | ||
options = null; | ||
} | ||
if (options) { | ||
if ('normalize' in options) parser.options.normalize = options.normalize; | ||
if ('addmeta' in options) parser.options.addmeta = options.addmeta; | ||
if (options.feedurl) parser.xmlbase.unshift({ '#name': 'xml', '#': options.feedurl}); | ||
} | ||
if (/^https?:/.test(file) || (typeof file == 'object' && 'protocol' in file)) { | ||
@@ -598,4 +689,19 @@ parser.parseUrl(file, callback); | ||
FeedParser.prototype.parseUrl = function(url, callback) { | ||
FeedParser.prototype.parseUrl = function(url, options, callback) { | ||
var parser = this; | ||
if (arguments.length === 2 && typeof options === 'function') { | ||
callback = options; | ||
options = null; | ||
} | ||
if (options) { | ||
if ('normalize' in options) parser.options.normalize = options.normalize; | ||
if ('addmeta' in options) parser.options.addmeta = options.addmeta; | ||
} | ||
if (!parser.xmlbase.length) { // .parseFile may have already populated this value | ||
if (/^https?:/.test(url)) { | ||
parser.xmlbase.unshift({ '#name': 'xml', '#': url}); | ||
} else if (typeof url == 'object' && 'href' in url) { | ||
parser.xmlbase.unshift({ '#name': 'xml', '#': url.href}); | ||
} | ||
} | ||
parser._setCallback(callback); | ||
@@ -623,4 +729,13 @@ request(url) | ||
FeedParser.prototype.parseStream = function(stream, callback) { | ||
FeedParser.prototype.parseStream = function(stream, options, callback) { | ||
var parser = this; | ||
if (arguments.length === 2 && typeof options === 'function') { | ||
callback = options; | ||
options = null; | ||
} | ||
if (options) { | ||
if ('normalize' in options) parser.options.normalize = options.normalize; | ||
if ('addmeta' in options) parser.options.addmeta = options.addmeta; | ||
if (options.feedurl) parser.xmlbase.unshift({ '#name': 'xml', '#': options.feedurl}); | ||
} | ||
parser._setCallback(callback); | ||
@@ -679,24 +794,4 @@ stream | ||
function handleAttributes (attrs, el) { | ||
Object.keys(attrs).forEach(function(name){ | ||
if (parser.xmlbase.length && (name == 'href' || name == 'src' || name == 'uri')) { | ||
// Apply xml:base to these elements as they appear | ||
// rather than leaving it to the ultimate parser | ||
attrs[name] = url.resolve(parser.xmlbase[0]['#'], attrs[name]); | ||
} else if (name == 'xml:base') { | ||
if (parser.xmlbase.length) { | ||
attrs[name] = url.resolve(parser.xmlbase[0]['#'], attrs[name]); | ||
} | ||
parser.xmlbase.unshift({ '#name': el, '#': attrs[name]}); | ||
} else if (name == 'type' && attrs['type'] == 'xhtml') { | ||
parser.in_xhtml = true; | ||
parser.xhtml = {'#name': el, '#': ''}; | ||
} | ||
attrs[name] = attrs[name].trim(); | ||
}); | ||
return attrs; | ||
} | ||
if (Object.keys(node.attributes).length) { | ||
n['@'] = handleAttributes(node.attributes, n['#name']); | ||
n['@'] = handleAttributes.call(parser, node.attributes, n['#name']); | ||
} | ||
@@ -743,11 +838,17 @@ | ||
FeedParser.prototype.handleCloseTag = function (el, scope){ | ||
var parser = scope; | ||
var item; | ||
var parser = scope | ||
, item | ||
, baseurl | ||
; | ||
var n = parser.stack.shift(); | ||
delete n['#name']; | ||
if (parser.xmlbase.length && (el == 'logo' || el == 'icon')) { // Via atom | ||
if (parser.xmlbase && parser.xmlbase.length) { | ||
baseurl = parser.xmlbase[0]['#']; | ||
} | ||
if (baseurl && (el == 'logo' || el == 'icon')) { // Via atom | ||
// Apply xml:base to these elements as they appear | ||
// rather than leaving it to the ultimate parser | ||
n['#'] = url.resolve(parser.xmlbase[0]['#'], n['#']); | ||
n['#'] = utils.resolve(baseurl, n['#']); | ||
} | ||
@@ -790,7 +891,10 @@ | ||
if (!parser.meta.title) { // We haven't yet parsed all the metadata | ||
utils.merge(parser.meta, handleMeta(parser.stack[0], parser.meta['#type'], parser.options)); | ||
utils.merge(parser.meta, handleMeta.call(parser, parser.stack[0], parser.meta['#type'], parser.options)); | ||
parser.emit('meta', parser.meta); | ||
} | ||
if (!baseurl && parser.xmlbase && parser.xmlbase.length) { // handleMeta was able to infer a baseurl without xml:base or options.feedurl | ||
n = reresolve(n, parser.xmlbase[0]['#']); | ||
} | ||
item = handleItem(n, parser.meta['#type'], parser.options); | ||
if (parser.options.addMetaToItems) { | ||
if (parser.options.addmeta) { | ||
item.meta = parser.meta; | ||
@@ -797,0 +901,0 @@ } |
{ "name" : "feedparser" | ||
, "author" : "Dan MacTough <danmactough@gmail.com>" | ||
, "description" : "Robust RSS Atom and RDF feed parsing using sax js" | ||
, "version": "0.9.13" | ||
, "version": "0.9.14" | ||
, "keywords" : ["rss", "feed", "atom", "rdf", "xml", "syndication"] | ||
@@ -20,3 +20,5 @@ , "homepage" : "http://github.com/danmactough/node-feedparser" | ||
} | ||
, "devDependencies": {} | ||
, "devDependencies": | ||
{ "mocha" : "1.3.x" | ||
} | ||
} |
@@ -15,2 +15,54 @@ # Feedparser - Robust RSS, Atom, and RDF feed parsing in Node.js | ||
## Changes since v0.9.13 | ||
Instantiating the parser or calling one of the parser methods now may be | ||
called with an optional [options object](#options). | ||
## Usage | ||
### Create a new instance | ||
```javascript | ||
var FeedParser = require('feedparser') | ||
, parser = new FeedParser() // optionally called with an options object | ||
; | ||
``` | ||
### parser.parseString(string, [options], [callback]) | ||
- `string` - the contents of the feed | ||
### parser.parseFile(filename, [options], [callback]) | ||
- `filename` - a local filename or remote url | ||
### parser.parseUrl(url, [options], [callback]) | ||
- `url` - fully qualified uri or a parsed url object from url.parse() | ||
### parser.parseStream(readableStream, [options], [callback]) | ||
- `readableStream` - a [Readable Stream](http://nodejs.org/api/stream.html#stream_readable_stream) | ||
### options | ||
- `normalize` - Set to `false` to override Feedparser's default behavior, | ||
which is to parse feeds into an object that contains the generic properties | ||
patterned after (although not identical to) the RSS 2.0 format, regardless | ||
of the feed's format. | ||
- `addmeta` - Set to `false` to override Feedparser's default behavior, which | ||
is to add the feed's `meta` information to each `article`. | ||
- `feedurl` - The url (string) of the feed. FeedParser is very good at | ||
resolving relative urls in feeds. But some feeds use relative urls without | ||
declaring the `xml:base` attribute any place in the feed. This is perfectly | ||
valid, but if we are parsing the feed with the `parseString`, `parseFile`, | ||
or `parseStream` method, we don't know know the feed's url before we start | ||
parsing the feed and trying to resolve those relative urls. If we discover | ||
the feed's url, we will go back and resolve the relative urls we've already | ||
seen, but this takes a little time (not much). If you want to be sure we | ||
never have to re-resolve relative urls (or if FeedParser is failing to | ||
properly resolve relative urls), you should set `feedurl`. | ||
## Examples | ||
@@ -27,2 +79,3 @@ | ||
``` | ||
### Use as an EventEmitter | ||
@@ -49,4 +102,4 @@ | ||
var reqObj = {'uri': 'http://cyber.law.harvard.edu/rss/examples/rss2sample.xml', | ||
'If-Modified-Since' : <your cached 'lastModified' value>, | ||
'If-None-Match' : <your cached 'etag' value>}; | ||
'headers': {'If-Modified-Since' : <your cached 'lastModified' value>, | ||
'If-None-Match' : <your cached 'etag' value>}}; | ||
@@ -53,0 +106,0 @@ // parseString() |
18
utils.js
@@ -0,2 +1,9 @@ | ||
/** | ||
* Module dependencies. | ||
*/ | ||
var URL = require('url') | ||
; | ||
/** | ||
* Merge object b with object a. | ||
@@ -24,3 +31,3 @@ * | ||
} else { | ||
a[key] = b[key] | ||
a[key] = b[key]; | ||
} | ||
@@ -30,3 +37,3 @@ } | ||
return a; | ||
}; | ||
} | ||
exports.merge = merge; | ||
@@ -57,3 +64,3 @@ | ||
return a; | ||
}; | ||
} | ||
exports.unique = unique; | ||
@@ -89,1 +96,6 @@ | ||
exports.get = get; | ||
function resolve (baseUrl, pathUrl) { | ||
return URL.resolve(baseUrl, pathUrl); | ||
} | ||
exports.resolve = resolve; |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
104606
12
1130
240
1
5