htmlparser2
Advanced tools
Comparing version 2.2.3 to 2.2.4
@@ -63,3 +63,3 @@ var ElementType = require("./ElementType.js"); | ||
DomHandler.prototype.onopentag = function(name, attribs){ | ||
DomHandler.prototype.onopentagname = function(name){ | ||
var element = { | ||
@@ -69,7 +69,2 @@ type: name === "script" ? ElementType.Script : name === "style" ? ElementType.Style : ElementType.Tag, | ||
}; | ||
//for some reason, an if doesn't work | ||
for(var i in attribs){ | ||
element.attribs = attribs; | ||
break; | ||
} | ||
this._addDomElement(element); | ||
@@ -79,2 +74,8 @@ this._tagStack.push(element); | ||
DomHandler.prototype.onattribute = function(name, value){ | ||
var element = this._tagStack[this._tagStack.length-1]; | ||
if(!("attribs" in element)) element.attribs = {}; | ||
element.attribs[name] = value; | ||
}; | ||
DomHandler.prototype.ontext = function(data){ | ||
@@ -81,0 +82,0 @@ if(this._options.ignoreWhitespace && data.trim() === "") return; |
@@ -59,3 +59,3 @@ var DomHandler = require("./DomHandler.js"), | ||
feed.type = feedRoot.name; | ||
feed.type = feedRoot.name.substr(0, 3); | ||
feed.id = ""; | ||
@@ -62,0 +62,0 @@ if(tmp = fetch("title", childs)) feed.title = tmp; |
@@ -28,2 +28,6 @@ var defineProp = Object.defineProperty; | ||
}, | ||
get ProxyHandler(){ | ||
defineProp(this, "ProxyHandler", {value:require("./ProxyHandler.js")}); | ||
return this.ProxyHandler; | ||
}, | ||
get DomUtils(){ | ||
@@ -30,0 +34,0 @@ defineProp(this, "DomUtils", {value:require("./DomUtils.js")}); |
@@ -18,3 +18,3 @@ var ElementType = require("./ElementType.js"); | ||
var _reAttrib = /\s([^\s\/]+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|(?=\s)|\/|$)/g, | ||
_reTail = /\s|\/|$/; | ||
_reTail = /\s|\/|$/; | ||
@@ -207,3 +207,3 @@ Parser.prototype._options = { | ||
} | ||
else this._processOpenTag(this._parseTagName(elementData), elementData); | ||
else this._processOpenTag(elementData); | ||
} | ||
@@ -259,2 +259,3 @@ else{ | ||
var emptyTags = { | ||
__proto__: null, | ||
area: true, | ||
@@ -288,3 +289,3 @@ base: true, | ||
else if(name === "br" && !this._options.xmlMode) | ||
this._processOpenTag(name, "/"); | ||
this._processOpenTag(name + "/"); | ||
}; | ||
@@ -307,4 +308,6 @@ | ||
Parser.prototype._processOpenTag = function(name, data){ | ||
var type = ElementType.Tag; | ||
Parser.prototype._processOpenTag = function(data){ | ||
var name = this._parseTagName(data), | ||
type = ElementType.Tag; | ||
if(this._options.xmlMode){ /*do nothing*/ } | ||
@@ -311,0 +314,0 @@ else if(name === "script") type = ElementType.Script; |
{ | ||
"name": "htmlparser2", | ||
"description": "Performance-optimized forgiving HTML/XML/RSS parser", | ||
"version": "2.2.3", | ||
"version": "2.2.4", | ||
"author": "Felix Boehm <me@feedic.com>", | ||
@@ -6,0 +6,0 @@ "keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"], |
@@ -8,11 +8,2 @@ #htmlparser2 [![Build Status](https://secure.travis-ci.org/FB55/node-htmlparser.png)](http://travis-ci.org/FB55/node-htmlparser) | ||
##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)? | ||
This is a fork of the project above. The main difference is that this is just intended to be used with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original. | ||
The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally intended for [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)). | ||
The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose) (if you really need it, for whatever reason that may be). | ||
The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected. | ||
##Usage | ||
@@ -60,5 +51,24 @@ | ||
```javascript | ||
new htmlparser.FeedHandler(function (error, feed) { | ||
new htmlparser.FeedHandler(function(<error> error, <object> feed){ | ||
... | ||
}); | ||
``` | ||
``` | ||
##Performance | ||
Using a slightly modified version of [node-expat](https://github.com/astro/node-expat)s `bench.js`, I received the following results (on a MacBook (late 2010): | ||
* [htmlparser](https://github.com/tautologistics/node-htmlparser): 51779 el/s | ||
* [sax.js](https://github.com/isaacs/sax-js): 53169 el/s | ||
* [node-expat](https://github.com/astro/node-expat): 103388 el/s | ||
* [htmlparser2](https://github.com/fb55/node-htmlparser): 118614 el/s | ||
The test may be found in `tests/bench.js`. | ||
##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)? | ||
This is a fork of the project above. The main difference is that this is just intended to be used with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original. | ||
The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally intended for [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)). | ||
The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose). | ||
The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected. |
@@ -1,9 +0,8 @@ | ||
var fs = require("fs"); | ||
var fs = require("fs"), | ||
assert = require("assert"); | ||
var runCount = 0, | ||
testCount = 0, | ||
failCount = 0; | ||
testCount = 0; | ||
function runTests(test){ | ||
var begin = Date.now(); | ||
//read files, load them, run them | ||
@@ -13,7 +12,9 @@ fs.readdirSync(__dirname + test.dir | ||
if(file[0] === ".") return false; | ||
if(file.substr(-5) === ".json") return JSON.parse( | ||
fs.readFileSync(__dirname + test.dir + file) | ||
); | ||
return require(__dirname + test.dir + file); | ||
}).forEach(function(file){ | ||
if(file === false) return; | ||
var second = false, | ||
failed = false; | ||
if(!file) return; | ||
var second = false; | ||
@@ -25,17 +26,8 @@ runCount++; | ||
test.test(file, function(err, dom){ | ||
if(err) console.log("Handler error:", err); | ||
var expected = JSON.stringify(file.expected, null, 2), | ||
got = JSON.stringify(dom, null, 2); | ||
if(expected !== got){ | ||
failed = true; | ||
console.log("Expected", expected, "Got", got, second); | ||
} | ||
assert.ifError(err); | ||
assert.deepEqual(file.expected, dom, "didn't get expected output"); | ||
if(second){ | ||
runCount--; | ||
testCount++; | ||
if(failed) failCount++; | ||
console.log("["+file.name+"]:", failed ? "failed":"passed"); | ||
} | ||
@@ -45,3 +37,3 @@ else second = true; | ||
}); | ||
console.log("->", test.dir.slice(1, -1), "iterated"); | ||
console.log("->", test.dir.slice(1, -1), "started"); | ||
}; | ||
@@ -60,11 +52,4 @@ | ||
(function check(){ | ||
if(runCount !== 0){ | ||
return setTimeout(check, 50); | ||
} | ||
if(runCount !== 0) return process.nextTick(check); | ||
console.log("Total tests:", testCount); | ||
console.log("Failed tests:", failCount); | ||
if(failCount !== 0){ | ||
throw Error("Encountered " + failCount + " errors!"); | ||
} | ||
})(); |
//Runs tests for feeds | ||
var helper = require("./test-helper.js"), | ||
FeedHandler = require("../lib/FeedHandler.js"); | ||
FeedHandler = require("../lib/FeedHandler.js"), | ||
fs = require("fs"), | ||
parserOpts = { | ||
xmlMode: true | ||
}; | ||
@@ -12,4 +16,5 @@ exports.dir = "/Feeds/"; | ||
else cb(null, dom); | ||
}, test.options.handler); | ||
helper.writeToParser(handler, test.options.parser, test.html); | ||
}); | ||
var file = fs.readFileSync(__dirname + "/Documents/" + test.file).toString(); | ||
helper.writeToParser(handler, parserOpts, file); | ||
}; |
exports.name = "RSS (2.0)"; | ||
exports.options = { | ||
handler: {}, | ||
parser: { | ||
xmlMode: true | ||
} | ||
}; | ||
exports.type = "rss"; | ||
exports.html = require("fs").readFileSync(__dirname+"/../Documents/RSS_Example.xml").toString(); | ||
exports.file = "/RSS_Example.xml"; | ||
exports.expected = { | ||
@@ -11,0 +4,0 @@ type: "rss", |
exports.name = "Atom (1.0)"; | ||
exports.options = { | ||
handler: {}, | ||
parser: { | ||
xmlMode: true | ||
} | ||
}; | ||
exports.type = "rss"; | ||
exports.html = require("fs").readFileSync(__dirname+"/../Documents/Atom_Example.xml").toString(); | ||
exports.file = "/Atom_Example.xml"; | ||
exports.expected = { | ||
@@ -11,0 +4,0 @@ type: "atom", |
exports.name = "RDF test"; | ||
exports.options = { | ||
handler: {}, | ||
parser: { | ||
xmlMode: true | ||
} | ||
}; | ||
exports.html = require("fs").readFileSync(__dirname+"/../Documents/RDF_Example.xml").toString(); | ||
exports.file = "/RDF_Example.xml"; | ||
exports.expected = { | ||
"type": "rdf:RDF", | ||
"type": "rdf", | ||
"id": "", | ||
@@ -14,0 +6,0 @@ "title": "craigslist | all community in SF bay area", |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
80630
62
3106
72
4
20