Socket
Socket
Sign inDemoInstall

htmlparser2

Package Overview
Dependencies
0
Maintainers
1
Versions
76
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 2.1.2 to 2.1.3

tests/Events/03-lowercase_tags.js

9

lib/DefaultHandler.js

@@ -30,5 +30,6 @@ var ElementType = require("./ElementType.js");

DefaultHandler.prototype._handleCallback =
DefaultHandler.prototype.onerror = function(error){
if(typeof this._callback === "function"){
return this._callback(error, this.dom);
this._callback(error, this.dom);
} else {

@@ -39,6 +40,4 @@ if(error) throw error;

DefaultHandler.prototype._handleCallback = DefaultHandler.prototype.onerror;
DefaultHandler.prototype.onclosetag = function(name){
this._tagStack.pop();
if(this._tagStack.pop().name !== name) this._handleCallback(Error("tagname didn't match!"));
};

@@ -102,3 +101,3 @@

data: data,
type: ElementType.Comment
type: ElementType.Comment
};

@@ -105,0 +104,0 @@

@@ -29,3 +29,3 @@ var ElementType = require("./ElementType.js");

if(recurse !== false) recurse = true;
if(isNaN(limit)) limit = Infinity;
if(isNaN(limit)) limit = 1/0;
if(!Array.isArray(element)){

@@ -41,14 +41,14 @@ element = [element];

for(var key in options){
if(key === "tag_name"){
if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
if(!options.tag_name(element.name)) return false;
} else if(key === "tag_type") {
if(!options.tag_type(type)) return false;
} else if(key === "tag_contains") {
if(type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false;
if(!options.tag_contains(element.data)) return false;
} else if(!element.attribs || !options[key](element.attribs[key]))
return false;
}
for(var key in options){
if(key === "tag_name"){
if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
if(!options.tag_name(element.name)) return false;
} else if(key === "tag_type") {
if(!options.tag_type(type)) return false;
} else if(key === "tag_contains") {
if(type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false;
if(!options.tag_contains(element.data)) return false;
} else if(!element.attribs || !options[key](element.attribs[key]))
return false;
}

@@ -90,3 +90,3 @@ return true;

else return filter(function(elem){
return filter(function(elem){
var type = elem.type;

@@ -142,2 +142,2 @@ if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;

}
};
};

@@ -8,3 +8,4 @@ //Types of elements found in the DOM

Style: "style", //Special tag <style>...</style>
Tag: "tag" //Any tag that isn't special
Tag: "tag", //Any tag that isn't special
CDATA: "cdata"
};

@@ -16,3 +16,3 @@ module.exports = {

Object.defineProperty(this, "ElementType", {value:require("./ElementType.js")});
return ElementType;
return this.ElementType;
},

@@ -19,0 +19,0 @@ get DomUtils(){

@@ -16,4 +16,4 @@ var ElementType = require("./ElementType.js");

//Regular expressions used for cleaning up and parsing (stateless)
var _reTagName = /[^\s\/]+/; //matches tag names
var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*(?:"([^"]*)"|'([^']*)'|([^'"\s]+))|([^=<>\"\'\s\/]+)/g;
var _reAttrib = /\s(\S+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|(?=\s)|\/|$)/g,
_reTail = /\s|\/|$/;

@@ -27,7 +27,12 @@ Parser.prototype._options = {

/*
onopentag,
onclosetag,
ontext,
onprocessinginstruction,
oncomment
oncdataend,
oncdatastart,
onclosetag,
oncomment,
oncommentend,
onerror,
onopentag,
onprocessinginstruction,
onreset,
ontext
*/

@@ -76,12 +81,6 @@ };

var parseAttributes = function(data){
var pos = data.search(/\w\s/) + 1, attrs = {}; //Find any whitespace
if(pos === 0) return attrs;
var attribRaw = data.substr(pos);
_reAttrib.lastIndex = 0;
var match;
while(match = _reAttrib.exec(attribRaw)){
if(match[1]) attrs[match[1]] = match[2] || match[3] || match[4];
else attrs[match[5]] = match[5];
var attrs = {}, match;
while(match = _reAttrib.exec(data)){
attrs[match[1]] = match[2] || match[3] || match[4] || match[1];
}

@@ -94,8 +93,5 @@

Parser.prototype._parseTagName = function(data){
var match = data.match(_reTagName);
if(match === null) return "";
if(this._options.lowerCaseTags){
return match[0].toLowerCase();
}
else return match[0];
var match = data.substr(0, data.search(_reTail));
if(!this._options.lowerCaseTags) return match;
return match.toLowerCase();
};

@@ -108,3 +104,4 @@

SpecialTags[ElementType.Script] = 2; //2^1
SpecialTags[ElementType.Comment] = 4; //2^3
SpecialTags[ElementType.Comment] = 4; //2^2
SpecialTags[ElementType.CDATA] = 8; //2^3

@@ -115,3 +112,3 @@ //Parses through HTML text and returns an array of found elements

var next, rawData, elementType, elementData, lastTagSep;
var next, rawData, elementData, lastTagSep;

@@ -121,3 +118,3 @@ var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");

//if force is true, parse everything
if(force) opening = Infinity;
if(force) opening = 1/0;

@@ -142,9 +139,17 @@ while(opening !== closing){ //just false if both are -1

if(this._contentFlags >= SpecialTags[ElementType.Comment]){
if(this._contentFlags >= SpecialTags[ElementType.CDATA]){
if(this._tagSep === ">" && rawData.substr(-2) === "]]"){
if(rawData.length !== 2 && this._cbs.ontext){
this._cbs.ontext(rawData.slice(0,-2));
}
this._contentFlags -= SpecialTags[ElementType.CDATA];
if(this._cbs.oncdataend) this._cbs.oncdataend();
}
else if(this._cbs.ontext) this._cbs.ontext(rawData + this._tagSep);
}
else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
//We're currently in a comment tag
this._processComment(rawData);
continue;
}
if(lastTagSep === "<"){
else if(lastTagSep === "<"){
elementData = rawData.trimLeft();

@@ -171,4 +176,4 @@ if(elementData.charAt(0) === "/"){

}
else if(elementData.charAt(0) === "!" || elementData.charAt(0) === "?"){
if(elementData.substr(0, 3) === "!--"){
else if(elementData.charAt(0) === "!"){
if(elementData.substr(1, 2) === "--"){
//This tag is a comment

@@ -178,8 +183,18 @@ this._contentFlags += SpecialTags[ElementType.Comment];

}
else if(elementData.substr(1, 7) === "[CDATA["){
if(this._cbs.oncdatastart) this._cbs.oncdatastart();
if(this._tagSep === ">" && elementData.substr(-2) === "]]"){
if(this._cbs.oncdataend) this._cbs.oncdataend();
if(this._cbs.ontext) this._cbs.ontext(elementData.slice(8, -2));
}
else{
if(this._cbs.ontext) this._cbs.ontext(elementData.substr(8));
this._contentFlags += SpecialTags[ElementType.CDATA];
}
}
else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
//This tag is a directive
//TODO: what about CDATA?
else if(this._cbs.onprocessinginstruction){
this._cbs.onprocessinginstruction(
elementData.charAt(0) + this._parseTagName(elementData.substr(1)),
"!" + this._parseTagName(elementData.substr(1)),
elementData

@@ -190,2 +205,10 @@ );

else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
else if(elementData.charAt(0) === "?"){
if(this._cbs.onprocessinginstruction){
this._cbs.onprocessinginstruction(
"?" + this._parseTagName(elementData.substr(1)),
elementData
);
}
}
else this._processOpenTag(this._parseTagName(elementData), elementData);

@@ -247,10 +270,9 @@ }

if(this._stack && (!emptyTags[name] || this._options.xmlMode)){
var i = this._stack.length;
while(i !== 0 && this._stack[--i] !== name){}
if(i !== 0 || this._stack[0] === name)
var pos = this._stack.lastIndexOf(name);
if(pos !== -1)
if(this._cbs.onclosetag){
while(i < this._stack.length)
this._cbs.onclosetag(this._stack.pop());
pos = this._stack.length - pos;
while(pos--) this._cbs.onclosetag(this._stack.pop());
}
else this._stack.splice(i);
else this._stack.splice(pos);
}

@@ -257,0 +279,0 @@ //many browsers (eg. Safari, Chrome) convert </br> to <br>

{
"name": "htmlparser2",
"description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface.",
"version": "2.1.2",
"version": "2.1.3",
"author": "Felix Boehm <me@feedic.com>",
"keywords": ["html", "parser", "xml", "dom", "rss", "feed", "atom"],
"contributors": ["Chris Winberry <chris@winberry.net>"],

@@ -20,3 +21,3 @@ "repository": {

"scripts": {
"test": "cd tests && node 00-runtests.js"
"test": "node --harmony_proxies tests/00-runtests.js"
},

@@ -23,0 +24,0 @@ "engines": "node >= 0.3.0",

@@ -10,6 +10,6 @@ var fs = require("fs");

//read files, load them, run them
fs.readdirSync(test.dir
fs.readdirSync(__dirname + test.dir
).map(function(file){
if(file[0] === ".") return false;
return require(test.dir + file);
return require(__dirname + test.dir + file);
}).forEach(function(file){

@@ -19,3 +19,3 @@ if(file === false) return;

failed = false,
start = Date.now()
start = Date.now(),
took = 0;

@@ -22,0 +22,0 @@

@@ -6,3 +6,3 @@ //Runs tests for HTML

exports.dir = "./HTML/";
exports.dir = "/HTML/";

@@ -9,0 +9,0 @@ /*

@@ -6,3 +6,3 @@ //Runs tests for feeds

exports.dir = "./Feeds/";
exports.dir = "/Feeds/";

@@ -15,2 +15,2 @@ exports.test = function(test, cb){

helper.writeToParser(handler, test.options.parser, test.html);
}
};

@@ -1,29 +0,41 @@

var helper = require("./test-helper.js");
var helper = require("./test-helper.js"),
sliceArr = Array.prototype.slice;
exports.dir = "./Events/";
exports.dir = "/Events/";
exports.test = function(test, cb){
var tokens = [];
var cbs = {
onopentag: function(name, attributes){
tokens.push({event:"open", name: name, attributes: attributes});
},
onclosetag: function(name){
tokens.push({event:"close", name: name});
},
ontext: function(text){
tokens.push({event:"text", text: text});
},
oncomment: function(data){
tokens.push({event:"comment", data:data});
},
onprocessinginstruction: function(name, data){
tokens.push({event:"processing", name:name, data:data});
},
onend: function(){
//deletes all tokens
cb(null, tokens.splice(0));
}
};
var tokens = [], cbs;
if(typeof Proxy !== "undefined"){
cbs = Proxy.create({ get: function(a, name){
if(name === "onend"){
return function(){
cb(null, tokens.splice(0));
}
}
if(name === "onreset") return function(){};
return function(){
tokens.push({
event: name.substr(2),
data: sliceArr.apply(arguments)
});
}
}});
}
else{
cbs = {
onerror: cb,
onend: function(){
cb(null, tokens.splice(0));
}
};
helper.EVENTS.forEach(function(name){
cbs["on" + name] = function(){
tokens.push({
event: name,
data: sliceArr.apply(arguments)
});
}
});
}
helper.writeToParser(cbs, test.options.parser, test.html);
}
};

@@ -1,3 +0,1 @@

var DomUtils = require("../lib/DomUtils.js");
//generate a dom

@@ -12,3 +10,3 @@ var handler = new (require("../lib/DefaultHandler.js"))();

exports.dir = "./DomUtils/";
exports.dir = "/DomUtils/";

@@ -15,0 +13,0 @@ exports.test = function(test, cb){

@@ -1,7 +0,26 @@

var xml = Array(5e3).join("<!directive><tag attr='value'> text <!--Comment<>--></tag>"),
parser = new (require("../lib/Parser.js"))({}),
var multiply = function(text){
return Array(5e3+1).join(text);
},
tests = {
self_closing: multiply("<br/>"),
tag: multiply("<tag foo=bar foobar> Text </tag>"),
comment: multiply("<!-- this is <<a> comment -->"),
directive: multiply("<?foo bar?>"),
special: multiply("<script> THIS IS <SPECIAL> </script>"),
xml: multiply("<!directive><tag attr='value'> text <!--Comment<>--></tag>")
}
empty = function(){},
cbs = {};
require("./test-helper.js").EVENTS.forEach(function(name){
cbs["on" + name] = empty;
});
var parser = new (require("../lib/Parser.js"))(cbs),
ben = require("ben");
console.log("Test took (ms)", ben(1e2, function(){
parser.parseComplete(xml);
}));
Object.keys(tests).forEach(function(name){
console.log("Test", name, "took", ben(150, function(){
parser.parseComplete(tests[name]);
}));
});
exports.name = "simple";
exports.options = {handler: {}, parser: {}};
exports.html = "<h1 class=test>adsf</h1>";
exports.expected = [ { event: 'open',
name: 'h1',
attributes: { class: 'test' } },
{ event: 'text', text: 'adsf' },
{ event: 'close', name: 'h1' } ];
exports.expected = [
{
"event": "opentag",
"data": [
"h1",
{
"class": "test"
},
"tag"
]
},
{
"event": "text",
"data": [
"adsf"
]
},
{
"event": "closetag",
"data": [
"h1"
]
}
];

@@ -6,28 +6,41 @@ exports.name = "Template script tags";

{
"event": "open",
"name": "script",
"attributes": {
"type": "text/template"
}
"event": "opentag",
"data": [
"script",
{
"type": "text/template"
},
"script"
]
},
{
"event": "text",
"text": "<h1"
"data": [
"<h1"
]
},
{
"event": "text",
"text": ">Heading1"
"data": [
">Heading1"
]
},
{
"event": "text",
"text": "</h1"
"data": [
"</h1"
]
},
{
"event": "text",
"text": ">"
"data": [
">"
]
},
{
"event": "close",
"name": "script"
"event": "closetag",
"data": [
"script"
]
}
];
exports.name = "RDF test";
exports.type = "rss";
exports.options = {

@@ -13,15 +12,18 @@ handler: {},

exports.expected = {
type: 'rdf:RDF',
id: '',
title: 'craigslist | all community in SF bay area',
link: 'http://sfbay.craigslist.org/ccc/',
items: [{
title: '![CDATA[ Music Equipment Repair and Consignment ]]',
link: '\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n',
description: '![CDATA[\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065'
}, {
title: '![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]',
link: '\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n',
description: '![CDATA[\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.'
}]
"type": "rdf:RDF",
"id": "",
"title": "craigslist | all community in SF bay area",
"link": "http://sfbay.craigslist.org/ccc/",
"items": [
{
"title": " Music Equipment Repair and Consignment ",
"link": "\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n",
"description": "\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065"
},
{
"title": "\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n",
"link": "\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n",
"description": "\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101."
}
]
};

@@ -13,2 +13,4 @@ var Parser = require("../lib/Parser.js"),

parser.parseComplete(data);
}
}
exports.EVENTS = ["cdatastart", "cdataend", "text", "processinginstruction", "comment", "commentend", "closetag", "opentag"/*, "error", "end"*/];
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc