Socket
Socket
Sign inDemoInstall

htmlparser2

Package Overview
Dependencies
0
Maintainers
1
Versions
76
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 2.2.3 to 2.2.4

lib/ProxyHandler.js

13

lib/DomHandler.js

@@ -63,3 +63,3 @@ var ElementType = require("./ElementType.js");

DomHandler.prototype.onopentag = function(name, attribs){
DomHandler.prototype.onopentagname = function(name){
var element = {

@@ -69,7 +69,2 @@ type: name === "script" ? ElementType.Script : name === "style" ? ElementType.Style : ElementType.Tag,

};
//for some reason, an if doesn't work
for(var i in attribs){
element.attribs = attribs;
break;
}
this._addDomElement(element);

@@ -79,2 +74,8 @@ this._tagStack.push(element);

DomHandler.prototype.onattribute = function(name, value){
var element = this._tagStack[this._tagStack.length-1];
if(!("attribs" in element)) element.attribs = {};
element.attribs[name] = value;
};
DomHandler.prototype.ontext = function(data){

@@ -81,0 +82,0 @@ if(this._options.ignoreWhitespace && data.trim() === "") return;

@@ -59,3 +59,3 @@ var DomHandler = require("./DomHandler.js"),

feed.type = feedRoot.name;
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";

@@ -62,0 +62,0 @@ if(tmp = fetch("title", childs)) feed.title = tmp;

@@ -28,2 +28,6 @@ var defineProp = Object.defineProperty;

},
get ProxyHandler(){
defineProp(this, "ProxyHandler", {value:require("./ProxyHandler.js")});
return this.ProxyHandler;
},
get DomUtils(){

@@ -30,0 +34,0 @@ defineProp(this, "DomUtils", {value:require("./DomUtils.js")});

@@ -18,3 +18,3 @@ var ElementType = require("./ElementType.js");

var _reAttrib = /\s([^\s\/]+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|(?=\s)|\/|$)/g,
_reTail = /\s|\/|$/;
_reTail = /\s|\/|$/;

@@ -207,3 +207,3 @@ Parser.prototype._options = {

}
else this._processOpenTag(this._parseTagName(elementData), elementData);
else this._processOpenTag(elementData);
}

@@ -259,2 +259,3 @@ else{

var emptyTags = {
__proto__: null,
area: true,

@@ -288,3 +289,3 @@ base: true,

else if(name === "br" && !this._options.xmlMode)
this._processOpenTag(name, "/");
this._processOpenTag(name + "/");
};

@@ -307,4 +308,6 @@

Parser.prototype._processOpenTag = function(name, data){
var type = ElementType.Tag;
Parser.prototype._processOpenTag = function(data){
var name = this._parseTagName(data),
type = ElementType.Tag;
if(this._options.xmlMode){ /*do nothing*/ }

@@ -311,0 +314,0 @@ else if(name === "script") type = ElementType.Script;

{
"name": "htmlparser2",
"description": "Performance-optimized forgiving HTML/XML/RSS parser",
"version": "2.2.3",
"version": "2.2.4",
"author": "Felix Boehm <me@feedic.com>",

@@ -6,0 +6,0 @@ "keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],

@@ -8,11 +8,2 @@ #htmlparser2 [![Build Status](https://secure.travis-ci.org/FB55/node-htmlparser.png)](http://travis-ci.org/FB55/node-htmlparser)

##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
This is a fork of the project above. The main difference is that this is just intended to be used with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original.
The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally intended for [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose) (if you really need it, for whatever reason that may be).
The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected.
##Usage

@@ -60,5 +51,24 @@

```javascript
new htmlparser.FeedHandler(function (error, feed) {
new htmlparser.FeedHandler(function(<error> error, <object> feed){
...
});
```
```
##Performance
Using a slightly modified version of [node-expat](https://github.com/astro/node-expat)s `bench.js`, I received the following results (on a MacBook (late 2010):
* [htmlparser](https://github.com/tautologistics/node-htmlparser): 51779 el/s
* [sax.js](https://github.com/isaacs/sax-js): 53169 el/s
* [node-expat](https://github.com/astro/node-expat): 103388 el/s
* [htmlparser2](https://github.com/fb55/node-htmlparser): 118614 el/s
The test may be found in `tests/bench.js`.
##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
This is a fork of the project above. The main difference is that this is just intended to be used with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original.
The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally intended for [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose).
The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected.

@@ -1,9 +0,8 @@

var fs = require("fs");
var fs = require("fs"),
assert = require("assert");
var runCount = 0,
testCount = 0,
failCount = 0;
testCount = 0;
function runTests(test){
var begin = Date.now();
//read files, load them, run them

@@ -13,7 +12,9 @@ fs.readdirSync(__dirname + test.dir

if(file[0] === ".") return false;
if(file.substr(-5) === ".json") return JSON.parse(
fs.readFileSync(__dirname + test.dir + file)
);
return require(__dirname + test.dir + file);
}).forEach(function(file){
if(file === false) return;
var second = false,
failed = false;
if(!file) return;
var second = false;

@@ -25,17 +26,8 @@ runCount++;

test.test(file, function(err, dom){
if(err) console.log("Handler error:", err);
var expected = JSON.stringify(file.expected, null, 2),
got = JSON.stringify(dom, null, 2);
if(expected !== got){
failed = true;
console.log("Expected", expected, "Got", got, second);
}
assert.ifError(err);
assert.deepEqual(file.expected, dom, "didn't get expected output");
if(second){
runCount--;
testCount++;
if(failed) failCount++;
console.log("["+file.name+"]:", failed ? "failed":"passed");
}

@@ -45,3 +37,3 @@ else second = true;

});
console.log("->", test.dir.slice(1, -1), "iterated");
console.log("->", test.dir.slice(1, -1), "started");
};

@@ -60,11 +52,4 @@

(function check(){
if(runCount !== 0){
return setTimeout(check, 50);
}
if(runCount !== 0) return process.nextTick(check);
console.log("Total tests:", testCount);
console.log("Failed tests:", failCount);
if(failCount !== 0){
throw Error("Encountered " + failCount + " errors!");
}
})();
//Runs tests for feeds
var helper = require("./test-helper.js"),
FeedHandler = require("../lib/FeedHandler.js");
FeedHandler = require("../lib/FeedHandler.js"),
fs = require("fs"),
parserOpts = {
xmlMode: true
};

@@ -12,4 +16,5 @@ exports.dir = "/Feeds/";

else cb(null, dom);
}, test.options.handler);
helper.writeToParser(handler, test.options.parser, test.html);
});
var file = fs.readFileSync(__dirname + "/Documents/" + test.file).toString();
helper.writeToParser(handler, parserOpts, file);
};
exports.name = "RSS (2.0)";
exports.options = {
handler: {},
parser: {
xmlMode: true
}
};
exports.type = "rss";
exports.html = require("fs").readFileSync(__dirname+"/../Documents/RSS_Example.xml").toString();
exports.file = "/RSS_Example.xml";
exports.expected = {

@@ -11,0 +4,0 @@ type: "rss",

exports.name = "Atom (1.0)";
exports.options = {
handler: {},
parser: {
xmlMode: true
}
};
exports.type = "rss";
exports.html = require("fs").readFileSync(__dirname+"/../Documents/Atom_Example.xml").toString();
exports.file = "/Atom_Example.xml";
exports.expected = {

@@ -11,0 +4,0 @@ type: "atom",

exports.name = "RDF test";
exports.options = {
handler: {},
parser: {
xmlMode: true
}
};
exports.html = require("fs").readFileSync(__dirname+"/../Documents/RDF_Example.xml").toString();
exports.file = "/RDF_Example.xml";
exports.expected = {
"type": "rdf:RDF",
"type": "rdf",
"id": "",

@@ -14,0 +6,0 @@ "title": "craigslist | all community in SF bay area",

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc