Socket
Socket
Sign inDemoInstall

feedparser

Package Overview
Dependencies
32
Maintainers
1
Versions
100
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 0.15.4 to 0.16.0

test/api.js

18

bin/dump.js

@@ -5,16 +5,16 @@ #!/usr/bin/env node

*
* Usage: node dump.js <feed url or filename>
* Usage: curl <feed url> | bin/dump.js
* cat <feed file> | bin/dump.js
*
*/
var util = require('util')
, feedparser = require('../')
, file = process.argv[2];
, FeedParser = require('../');
if (!file) {
process.exit(2);
}
feedparser.parseFile(file)
process.stdin.pipe(new FeedParser())
.on('error', console.error)
.on('complete', function(){
console.log(util.inspect(arguments, null, 10, true));
.on('readable', function() {
var stream = this, item;
while (item = stream.read()) {
console.log(util.inspect(item, null, 10, true));
}
});

@@ -9,3 +9,3 @@ /*!

, fs = require('fs')
, feed = '../test/feeds/rss2sample.xml';
, feed = __dirname+'/../test/feeds/rss2sample.xml';

@@ -20,4 +20,7 @@ fs.createReadStream(feed)

})
.on('article', function(article){
console.log('Got article: %s', article.title || article.description);
.on('readable', function() {
var stream = this, item;
while (item = stream.read()) {
console.log('Got article: %s', item.title || item.description);
}
});
v0.16.0 / 2013-06-11
==================
* Update README
* Remove legacy libxml-like helpers
* Update dump script
* Update examples
* Update tests
* Emit SAXErrors and allow consumer to handle or bail on SAXErrors
* Update copyright notices
* Merge branch 'AndreasMadsen-transform-stream'
* Change stream test to not require additional dependency
* make feedparser a transform stream
v0.15.4 / 2013-06-04

@@ -3,0 +17,0 @@ ==================

/**********************************************************************
node-feedparser - A robust RSS, Atom, RDF parser for node.
http://github.com/danmactough/node-feedparser
Copyright (c) 2011 Dan MacTough
Copyright (c) 2011, 2012, 2013 Dan MacTough and contributors
http://yabfog.com

@@ -13,18 +13,54 @@

var sax = require('sax')
, request = require('request')
, addressparser = require('addressparser')
, indexOfObject = require('array-indexofobject')
, resanitize = require('resanitize')
, fs = require('fs')
, URL = require('url')
, util = require('util')
, EventEmitter = require('events').EventEmitter
, Stream = require('stream').Stream
, STATUS_CODES = require('http').STATUS_CODES
, TransformStream = require('stream').Transform
, utils = require('./utils')
;
if (TransformStream === undefined) {
TransformStream = require('readable-stream').Transform;
}
/**
* FeedParser constructor. Most apps will only use one instance.
*
* Exposes a duplex (transform) stream to parse a feed.
*
* Each article/post in the feed will have the following keys:
* - title {String}
* - description {String}
* - summary {String}
* - date {Date} (or null)
* - pubdate {Date} (or null)
* - link {String}
* - origlink {String}
* - author {String}
* - guid {String}
* - comments {String}
* - image {Object}
* - categories {Array}
* - source {Object}
* - enclosures {Array}
* - meta {Object}
* - Object.keys(meta):
* - #ns {Array} key,value pairs of each namespace declared for the feed
* - #type {String} one of 'atom', 'rss', 'rdf'
* - #version {String}
* - title {String}
* - description {String}
* - date {Date} (or null)
* - pubdate {Date} (or null)
* - link {String} i.e., to the website, not the feed
* - xmlurl {String} the canonical URL of the feed, as declared by the feed
* - author {String}
* - language {String}
* - image {Object}
* - favicon {String}
* - copyright {String}
* - generator {String}
* - categories {Array}
*
* @param {Object} options

@@ -35,2 +71,6 @@ * @api public

if (!(this instanceof FeedParser)) return new FeedParser(options);
TransformStream.call(this, {
objectMode: true
});
this.init();

@@ -40,3 +80,3 @@ this.parseOpts(options);

this.stream = sax.createStream(this.options.strict /* strict mode - no by default */, {lowercase: true, xmlns: true });
this.stream.on('error', this.handleError.bind(this, this.handleSaxError.bind(this)));
this.stream.on('error', this.handleSaxError.bind(this));
this.stream.on('processinginstruction', this.handleProcessingInstruction.bind(this));

@@ -48,7 +88,4 @@ this.stream.on('opentag', this.handleOpenTag.bind(this));

this.stream.on('end', this.handleEnd.bind(this));
Stream.call(this);
this.writable = true;
this.readable = true;
}
util.inherits(FeedParser, Stream);
util.inherits(FeedParser, TransformStream);

@@ -66,3 +103,2 @@ /*

};
this.articles = [];
this.stack = [];

@@ -76,3 +112,2 @@ this.nodes = {};

this.errors = [];
this.silenceErrors = false;
};

@@ -88,2 +123,3 @@

if (!('addmeta' in this.options)) this.options.addmeta = true;
if (!('resume_saxerror' in this.options)) this.options.resume_saxerror = true;
if ('MAX_BUFFER_LENGTH' in this.options) {

@@ -100,32 +136,17 @@ sax.MAX_BUFFER_LENGTH = this.options.MAX_BUFFER_LENGTH; // set to Infinity to have unlimited buffers

// parsing a feed
if (!this.errors.length && this.meta && !this.meta['#type']) {
this.meta['#type'] = 'INVALID'; // Set a value so we don't cause an infinite loop
if (this.meta && !this.meta['#type']) {
var e = new Error('Not a feed');
if (this.response && this.response.request && this.response.request.href) {
e.url = this.response.request.href;
}
return this.handleError(e);
}
if ('function' === typeof this.callback) {
if (this.errors.length) {
var error = this.errors.pop();
if (this.errors.length) {
error.errors = this.errors;
}
this.callback(error);
} else {
this.callback(null, this.meta, this.articles);
}
this.push(null);
};
FeedParser.prototype.handleSaxError = function (e) {
this.emit('error', e);
if (this.options.resume_saxerror) {
this.resumeSaxError();
}
if (!this.errors.length) { this.emit('complete', this.meta, this.articles); }
this.emit('end');
if (this.stream) {
this.stream.removeAllListeners('end');
this.stream.removeAllListeners('error');
}
this.stream.on('error', function() {});
this.stream._parser.close();
};
FeedParser.prototype.handleSaxError = function (){
FeedParser.prototype.resumeSaxError = function () {
if (this.stream._parser) {

@@ -137,23 +158,4 @@ this.stream._parser.error = null;

FeedParser.prototype.handleError = function (next, e){
// A SaxError will prepend an error-handling callback,
// but other calls to #handleError will not
if (next && !e) {
e = next;
next = null;
}
// Only emit the error event if we are not using CPS or
// if we have a listener on 'error' even if we are using CPS
if (!this.silenceErrors && (!this.callback || this.listeners('error').length)) {
this.emit('error', e);
}
this.errors.push(e);
if (typeof next === 'function') {
next();
} else {
['processinginstruction', 'opentag', 'closetag', 'text', 'cdata', 'end'].forEach(function(ev){
this.stream && this.stream.removeAllListeners(ev);
}, this);
this.handleEnd();
}
FeedParser.prototype.handleError = function (e){
this.emit('error', e);
};

@@ -314,4 +316,3 @@

if (this.meta.author && !item.author) item.author = this.meta.author;
this.emit('article', item);
this.articles.push(item);
this.push(item);
} else if (!this.meta.title && // We haven't yet parsed all the metadata

@@ -1001,249 +1002,12 @@ (node['#name'] === 'channel' ||

// Naive Stream API
FeedParser.prototype.write = function (data) {
FeedParser.prototype._transform = function (data, encoding, done) {
this.stream.write(data);
return true;
done();
};
FeedParser.prototype.end = function (chunk) {
if (chunk && chunk.length) this.stream.write(chunk);
FeedParser.prototype._flush = function (done) {
this.stream.end();
return true;
done();
};
function feedparser (options, callback) {
if ('function' === typeof options) {
callback = options;
options = {};
}
var fp = new FeedParser(options);
fp.callback = callback;
return fp;
}
/**
* Parses a feed contained in a string.
*
* For each article/post in a feed, emits an 'article' event
* with an object with the following keys:
* title {String}
* description {String}
* summary {String}
* date {Date} (or null)
* pubdate {Date} (or null)
* link {String}
* origlink {String}
* author {String}
* guid {String}
* comments {String}
* image {Object}
* categories {Array}
* source {Object}
* enclosures {Array}
* meta {Object}
* Object.keys(meta):
* #ns {Array} key,value pairs of each namespace declared for the feed
* #type {String} one of 'atom', 'rss', 'rdf'
* #version {String}
* title {String}
* description {String}
* date {Date} (or null)
* pubdate {Date} (or null)
* link {String} i.e., to the website, not the feed
* xmlurl {String} the canonical URL of the feed, as declared by the feed
* author {String}
* language {String}
* image {Object}
* favicon {String}
* copyright {String}
* generator {String}
* categories {Array}
*
* Emits a 'warning' event on each XML parser warning
*
* Emits an 'error' event on each XML parser error
*
* @param {String} string of XML representing the feed
* @param {Object} options
* @param {Function} callback
* @api public
*/
FeedParser.parseString = function (string, options, callback) {
var fp = feedparser(options, callback);
// Must delay to give caller a change to attach event handlers
process.nextTick(function(){
fp.stream
.on('error', fp.handleError.bind(fp))
.end(string, Buffer.isBuffer(string) ? null : 'utf8'); // Accomodate a Buffer in addition to a String
});
return fp;
};
/**
* Parses a feed from a file or (for compatability with libxml) a url.
* See parseString for more info.
*
* @param {String} path to the feed file or a fully qualified uri or parsed url object from url.parse()
* @param {Object} options
* @param {Function} callback
* @api public
*/
FeedParser.parseFile = function (file, options, callback) {
if (/^https?:/.test(file) || (typeof file === 'object' && ('href' in file || 'uri' in file || 'url' in file))) {
return FeedParser.parseUrl(file, options, callback);
}
var fp = feedparser(options, callback);
fs.createReadStream(file)
.on('error', fp.handleError.bind(fp))
.pipe(fp.stream);
return fp;
};
/**
* Parses a feed from a Stream.
*
* Example:
* fp = new FeedParser();
* fp.on('article', function (article){ // do something });
* fp.parseStream(fs.createReadStream('file.xml')[, callback]);
*
*
* See parseString for more info.
*
* @param {Readable Stream}
* @param {Object} options
* @param {Function} callback
* @api public
*/
FeedParser.parseStream = function (stream, options, callback) {
var fp = feedparser(options, callback);
stream && stream
.on('error', fp.handleError.bind(fp))
.pipe(fp.stream);
return fp;
};
/**
* Parses a feed from a url.
*
* Please consider whether it would be better to perform conditional GETs
* and pass in the results instead.
*
* See parseString for more info.
*
* @param {String|Object} fully qualified uri, parsed url object from url.parse(),
* or a Request object with uri|url and headers
* @param {Object} options
* @param {Function} callback
* @api public
*/
FeedParser.parseUrl = function (url, options, callback) {
var fp = feedparser(options, callback);
var handleResponse = function (response) {
fp.response = response;
fp.emit('response', response);
var code = response.statusCode;
var codeReason = STATUS_CODES[code] || 'Unknown Failure';
var contentType = response.headers && response.headers['content-type'] || '';
var e = new Error();
if (code !== 200) {
if (code === 304) {
fp.emit('304');
fp.meta = fp.articles = null;
fp.silenceErrors = true;
fp.removeAllListeners('complete');
fp.removeAllListeners('meta');
fp.removeAllListeners('article');
fp.handleEnd();
}
else {
e.message = 'Remote server responded: ' + codeReason;
e.code = code;
e.url = url;
fp.handleError(e);
response.request && response.request.abort();
}
return;
}
(function () {
var parts = contentType.split(/; ?/);
var mediatype = parts[0]
, mediatype_parts = mediatype.split('/')
, parameters = parts.length ? parts.slice(1) : [];
fp.meta['#content-type'] = parameters.reduce(function (map, param) {
var pair = param.split('=');
map['@'][pair[0]] = pair[1];
return map;
},{
'#': contentType,
'@': {
'media-type': mediatype,
'type': mediatype_parts[0],
'subtype': mediatype_parts[1],
}
});
})();
return;
};
// Make sure we have a url and normalize the request object
var invalid = 'Invalid URL: must be a string or valid request object - %s';
if (/^https?:/.test(url)) {
url = {
uri: url
};
} else if (url && typeof url === 'object') {
if ('href' in url) { // parsed url
if (!/^https?:/.test(URL.format(url))) {
throw (new Error(util.format(invalid, url)));
}
url = {
url: url
};
} else {
if (url.url && url.uri) delete url.uri; // wtf?!
if (! (url.url || url.uri) ) throw (new Error(util.format(invalid, url)));
if (url.url) {
if (/^https?:/.test(url.url)) {
url.uri = url.url;
delete url.url;
} else if ( !(typeof url.url === 'object' && 'href' in url.url && /^https?:/.test(URL.format(url.url))) ) {
// not a string, not a parsed url
throw (new Error(util.format(invalid, url.url)));
}
}
if (url.uri) {
if ( typeof url.uri === 'object' && 'href' in url.uri && /^https?:/.test(URL.format(url.uri)) ) {
url.url = url.uri;
delete url.uri;
} else if (!/^https?:/.test(url.uri)) {
// not a string, not a parsed url
throw (new Error(util.format(invalid, url.uri)));
}
}
}
} else {
throw (new Error(util.format(invalid, url)));
}
url.headers = url.headers || {};
url.headers['Accept-Encoding'] = 'identity';
if (!fp.xmlbase.length) {
if (url.uri) {
fp.xmlbase.unshift({ '#name': 'xml', '#': url.uri });
} else if (url.url) {
fp.xmlbase.unshift({ '#name': 'xml', '#': URL.format(url.url) });
}
}
request(url)
.on('error', fp.handleError.bind(fp))
.on('response', handleResponse)
.pipe(fp.stream)
;
return fp;
};
exports = module.exports = FeedParser;

@@ -5,3 +5,3 @@ {

"description": "Robust RSS Atom and RDF feed parsing using sax js",
"version": "0.15.4",
"version": "0.16.0",
"keywords": [

@@ -34,2 +34,3 @@ "rss",

"array-indexofobject": "0.0.1",
"readable-stream": "1.0.x",
"resanitize": "~0.1.10"

@@ -36,0 +37,0 @@ },

@@ -16,6 +16,6 @@ [![Build Status](https://secure.travis-ci.org/danmactough/node-feedparser.png?branch=master)](https://travis-ci.org/danmactough/node-feedparser)

- [sax](https://github.com/isaacs/sax-js)
- [request](https://github.com/mikeal/request)
- [addressparser](https://github.com/andris9/addressparser)
- [resanitize](https://github.com/danmactough/node-resanitize)
- [array-indexofobject](https://github.com/danmactough/node-array-indexofobject)
- [readable-stream](https://github.com/isaacs/readable-stream) (only if using Node <= v0.8.x)

@@ -28,11 +28,26 @@ ## Installation

## Changes since v0.13.x
## Changes since v0.15.x
- The ability to handle `.pipe()` is back. The libxml-like helper methods will
probably be going away in the next minor (or major) version release -- as soon
as I'm sure the stream API is stable and compatible with Node v0.10.x.
- The libxml-like helper methods have been removed. There is now just one input
interface: the stream interface.
- Events:
- `304`, `response` - removed, as Feedparser no longer fetches urls
- `article`, `complete` - removed; use the stream interface
- `data` - all readable streams will emit a `data` event, but this puts the
stream into "old" v0.8-style push streams
- `end` - stream behavior dictates that the `end` event will never fire if
you don't read any data from the stream; you can kick the Feedparser stream
to work like an "old" v0.8-style push stream (and get the old `end` event
behavior) by calling `.resume()`.
- `SAXErrors` are emitted as `error` events. By default, they are automatically
resumed. Pass `{ resume_saxerrors: false }` as an option if you want to manually
handle `SAXErrors` (abort parsing, perhaps).
## Usage
The easiest way to use feedparser is to just give it a [readable stream](http://nodejs.org/api/stream.html#stream_readable_stream).
The easiest way to use feedparser is to just give it a [readable stream](http://nodejs.org/api/stream.html#stream_readable_stream).
It will then return a readable object stream.

@@ -52,10 +67,19 @@ ```js

})
.on('article', function (article) {
// do something else
.on('readable', function () {
// do something else, then do the next thing
})
.on('end', function () {
// do the next thing
});
```
Or:
```js
var FeedParser = require('feedparser')
, request = require('request');
request('http://somefeedurl.xml')
.pipe(new FeedParser([options]))
.pipe([some other stream])
```
### options

@@ -69,3 +93,3 @@

- `addmeta` - Set to `false` to override Feedparser's default behavior, which
is to add the feed's `meta` information to each `article`.
is to add the feed's `meta` information to each article.

@@ -82,25 +106,8 @@ - `feedurl` - The url (string) of the feed. FeedParser is very good at

## libxml-like Helper Methods (deprecated)
- `resume_saxerror` - Set to `false` to override Feedparser's default behavior, which
is to emit any `SAXError` on `error` and then automatically resume parsing. In
my experience, `SAXErrors` are not usually fatal, so this is usually helpful
behavior. If you want total control over handling these errors and optionally
aborting parsing the feed, use this option.
### parser.parseString(string, [options], [callback])
- `string` - the contents of the feed
### parser.parseFile(filename, [options], [callback])
- `filename` - a local filename or remote url
### parser.parseUrl(url, [options], [callback])
The first argument can be either a url or a `request` options object. The only
required option is uri, all others are optional. See
[request](https://github.com/mikeal/request#requestoptions-callback) for details
about what that `request` options object might look like.
- `url` - fully qualified uri or a parsed url object from url.parse()
### parser.parseStream(readableStream, [options], [callback])
- `readableStream` - a [Readable Stream](http://nodejs.org/api/stream.html#stream_readable_stream)
## Examples

@@ -112,22 +119,17 @@

### Events Emitted
### Transform Stream
* `error` - called with `error` whenever there is a an error of any kind (SAXError, Feedparser error, request error, etc.)
* `meta` - called with `meta` when it has been parsed
* `article` - called with a single `article` when each article has been parsed
* `complete` - called with `meta` and `articles` when parsing is complete
* `end` - called with no parameters when parsing is complete or aborted (e.g., due to error)
* `response` - called with the HTTP `response` only when a url has been fetched via parseUrl or parseFile
* `304` - called with no parameters when when a url has been fetched with a conditional GET via parseUrl or parseFile and the remote server responds with '304 Not Modified'
Feedparser is a [transform stream](http://nodejs.org/api/stream.html#stream_class_stream_transform) operating in "object mode": XML in -> Javascript objects out.
Each readable chunk is an object representing an article in the feed.
### callback(error, meta, articles)
### Events Emitted
You can provide a callback (i.e., via the libxml-like helper methods) to be
called when the feed is finished being parsed. Because the helper methods are
deprecated, you should consider the ability to provide a callback deprecated,
as well. Use events instead.
* `meta` - called with feed `meta` when it has been parsed
* `error` - called with `error` whenever there is a Feedparser error of any kind (SAXError, Feedparser error, etc.)
## What is the parsed output produced by feedparser?
Feedparser parses each feed into a `meta` portion and one or more `articles`.
Feedparser parses each feed into a `meta` (emitted on the `meta` event) portion
and one or more `articles` (emited on the `data` event or readable after the `readable`
is emitted).

@@ -216,3 +218,3 @@ Regardless of the format of the feed, the `meta` and each `article` contain a

Copyright (c) 2011-2013 Dan MacTough &lt;danmactough@gmail.com&gt;
Copyright (c) 2011, 2012, 2013 Dan MacTough and contributors

@@ -219,0 +221,0 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of

@@ -1,26 +0,21 @@

describe('feedparser', function(){
describe('categories', function(){
var feed = __dirname + '/feeds/category-feed.xml'
, meta = {}
, articles = {}
;
var feed = __dirname + '/feeds/category-feed.xml';
describe('categories with comma in them', function(){
before(function(done){
FeedParser.parseFile(feed, function (error, _meta, _articles) {
assert.ifError(error);
meta = _meta;
articles = _articles;
done();
});
});
describe('article', function(){
it('should should not seperate by comma', function() {
assert.deepEqual(articles[0].categories, [
it('should not seperate by comma', function (done) {
fs.createReadStream(feed).pipe(new FeedParser())
.once('readable', function () {
var stream = this;
assert.deepEqual(stream.read().categories, [
'Water Pollution',
'Gowanus Canal (Brooklyn, NY)'
]);
done();
})
.on('error', function (err) {
assert.ifError(err);
done(err);
});
});
});
});

@@ -5,7 +5,7 @@ /*global assert:true, FeedParser:true, server:true*/

assert = require('assert');
var fs = require('fs')
, path = require('path')
var path = require('path')
, zlib = require('zlib')
, gzip = zlib.createGzip();
fs = require('fs');
FeedParser = require('../');

@@ -12,0 +12,0 @@ server = function (done) {

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc