Socket
Socket
Sign inDemoInstall

htmlparser2

Package Overview
Dependencies
Maintainers
1
Versions
76
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

htmlparser2 - npm Package Compare versions

Comparing version 2.0.1 to 2.1.0

38

lib/DomUtils.js

@@ -100,3 +100,41 @@ var ElementType = require("./ElementType.js");

else return filter(function(elem){return elem.type === type;}, element, recurse, limit);
},
getInnerHTML: function(elem){
if(!elem.children) return "";
var childs = elem.children,
childNum = childs.length,
ret = "";
for(var i = 0; i < childNum; i++){
ret += this.getOuterHTML(childs[i]);
}
return ret;
},
getOuterHTML: function(elem){
var type = elem.type;
if(type === ElementType.Text) return elem.data;
if(type === ElementType.Comment) return "<!--" + elem.data + "-->";
var ret = "<" + elem.name;
var value;
for(var name in elem.attribs){
value = elem.attribs[name];
ret += " " + name + "=";
if(/^[^\s"\'\`\=\<\>]+$/.test(value)) ret += value;
else if(value.indeOf("\"") !== -1) ret += "'" + value + "'";
else ret += "\"" + value + "\"";
}
if(type === ElementType.Directive) return ret + ">";
if(type === ElementType.Tag && !elem.children) return ret + " />";
return ">" + ret + this.getInnerHTML(elem) + "</" + elem.name + ">";
}
};

174

lib/Parser.js

@@ -8,7 +8,6 @@ var ElementType = require("./ElementType.js");

this._buffer = "";
this._prevTagSep = "";
this._tagSep = "";
this._stack = [];
this._contentFlags = 0;
this._done = false;
this._parseState = ElementType.Text;
}

@@ -18,4 +17,4 @@

//Regular expressions used for cleaning up and parsing (stateless)
var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //matches tagnames
var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;//"
var _reTagName = /[^\s\/]+/; //matches tagnames
var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*(?:"([^"]*)"|'([^']*)'|([^'"\s]+))|([^=<>\"\'\s\/]+)/g;

@@ -42,4 +41,4 @@ Parser.prototype._options = {

this.reset();
this.parseChunk(data);
this.done();
this.write(data);
this.end();
};

@@ -56,3 +55,3 @@

//Tells the parser that the HTML being parsed is complete
Parser.prototype.done = function(){
Parser.prototype.end = Parser.prototype.done = function(chunk){
if(this._done) return;

@@ -62,2 +61,3 @@ this._done = true;

//Parse the buffer to its end
if(chunk) this._buffer += chunk;
if(this._buffer) this._parseTags(true);

@@ -81,4 +81,4 @@

var parseAttributes = function(data){
var pos = data.search(/\s/), attrs = {}; //Find any whitespace
if(pos === -1) return attrs;
var pos = data.search(/\w\s/) + 1, attrs = {}; //Find any whitespace
if(pos === 0) return attrs;
var attribRaw = data.substr(pos);

@@ -90,6 +90,4 @@

while(match = _reAttrib.exec(attribRaw)){
if(match[1]) attrs[match[1]] = match[2];
else if(match[3]) attrs[match[3]] = match[4];
else if(match[5]) attrs[match[5]] = match[6];
else if(match[7]) attrs[match[7]] = match[7];
if(match[1]) attrs[match[1]] = match[2] || match[3] || match[4];
else attrs[match[5]] = match[5];
}

@@ -105,5 +103,5 @@

if(this._options.lowerCaseTags){
return match[1] + match[2].toLowerCase();
return match[0].toLowerCase();
}
else return match[1] + match[2];
else return match[0];
};

@@ -123,3 +121,3 @@

var next, tagSep, rawData, elementName, elementType, elementData;
var next, rawData, elementType, elementData, lastTagSep;

@@ -132,82 +130,76 @@ var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");

while(opening !== closing){ //just false if both are -1
lastTagSep = this._tagSep;
if((opening !== -1 && opening < closing) || closing === -1){
next = opening;
tagSep = "<";
opening = buffer.indexOf(tagSep, next + 1);
this._tagSep = "<";
opening = buffer.indexOf("<", next + 1);
}
else{
next = closing;
tagSep = ">";
closing = buffer.indexOf(tagSep, next + 1);
this._tagSep = ">";
closing = buffer.indexOf(">", next + 1);
}
rawData = buffer.substring(current, next); //The next chunk of data to parse
elementType = this._parseState;
//set elements for next run
current = next + 1;
this._parseState = (tagSep === "<") ? ElementType.Tag : ElementType.Text;
if(elementType === ElementType.Tag){
elementData = rawData.trim();
elementName = this._parseTagName(elementData);
}
else{
elementData = rawData;
elementName = "";
}
//This section inspects the current tag stack and modifies the current
//element if we're actually parsing a special area (script/comment/style tag)
if(this._contentFlags === 0){ /*do nothing*/ }
else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
if(this._contentFlags >= SpecialTags[ElementType.Comment]){
//We're currently in a comment tag
this._processComment(rawData, tagSep);
this._processComment(rawData);
continue;
}
//if it's a closing tag, remove the flag
else if(this._contentFlags >= SpecialTags[ElementType.Script] && elementName === "/script"){
//remove the script flag (also removes the written flag)
this._contentFlags %= SpecialTags[ElementType.Script];
}
else if(this._contentFlags >= SpecialTags[ElementType.Style] && elementName === "/style"){
//remove the style flag (also removes the written flag)
this._contentFlags %= SpecialTags[ElementType.Style];
}
//special behaviour for script & style tags
//Make sure we're not in a comment
else if(!this._options.xmlMode && rawData.substring(0, 3) !== "!--"){
//If the previous element is text, append the last tag sep to element
if(this._contentFlags >= SpecialTags.w){
if(this._cbs.ontext) this._cbs.ontext(this._prevTagSep + rawData);
if(lastTagSep === "<"){
elementData = rawData.trimLeft();
if(elementData.charAt(0) === "/"){
//elementData = elementData.substr(1).trim();
elementData = this._parseTagName(elementData.substr(1));
if(this._contentFlags !== 0){
//if it's a closing tag, remove the flag
if(this._contentFlags >= SpecialTags[ElementType.Script] && elementData === "script"){
//remove the script flag (also removes the written flag)
this._contentFlags %= SpecialTags[ElementType.Script];
}
else if(this._contentFlags >= SpecialTags[ElementType.Style] && elementData === "style"){
//remove the style flag (also removes the written flag)
this._contentFlags %= SpecialTags[ElementType.Style];
}
else {
this._writeSpecial(rawData, lastTagSep);
continue;
}
}
this._processCloseTag(elementData);
}
else{ //The previous element was not text
this._contentFlags += SpecialTags.w;
if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
else if(elementData.charAt(0) === "!" || elementData.charAt(0) === "?"){
if(elementData.substr(0, 3) === "!--"){
//This tag is a comment
this._contentFlags += SpecialTags[ElementType.Comment];
this._processComment(rawData.substr(3));
}
else if(this._contentFlags !== 0){
this._writeSpecial(rawData, lastTagSep);
}
//This tag is a directive
//TODO: what about CDATA?
else if(this._cbs.onprocessinginstruction){
this._cbs.onprocessinginstruction(
elementData.charAt(0) + this._parseTagName(elementData.substr(1)),
elementData
);
}
}
this._prevTagSep = tagSep;
continue;
else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
else this._processOpenTag(this._parseTagName(elementData), elementData);
}
//Processing of non-special tags
if(elementType === ElementType.Tag){
if(rawData.substring(0, 3) === "!--"){ //This tag is a comment
this._contentFlags += SpecialTags[ElementType.Comment];
this._processComment(rawData.substr(3), tagSep);
continue;
else{
if(this._contentFlags !== 0){
this._writeSpecial(rawData, lastTagSep);
}
if(rawData.charAt(0) === "!" || rawData.charAt(0) === "?"){
//ElementType.Directive
//TODO: what about CDATA?
if(this._cbs.onprocessinginstruction){
this._cbs.onprocessinginstruction(elementName, elementData);
}
continue;
else if(rawData !== "" && this._cbs.ontext){
this._cbs.ontext(rawData);
}
if(elementName.charAt(0) === "/") this._processCloseTag(elementName.substr(1));
else this._processOpenTag(elementName, elementData, tagSep);
}
else if(elementType === ElementType.Text && rawData !== "" && this._cbs.ontext){
this._cbs.ontext(elementData);
}
}

@@ -218,6 +210,4 @@

Parser.prototype._processComment = function(rawData, tagSep){
this._prevTagSep = tagSep;
if(tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
Parser.prototype._processComment = function(rawData){
if(this._tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
//remove the written flag (also removes the comment flag)

@@ -228,13 +218,20 @@ this._contentFlags %= SpecialTags.w;

}
else if(this._cbs.oncomment) this._cbs.oncomment(rawData + tagSep);
else if(this._cbs.oncomment) this._cbs.oncomment(rawData + this._tagSep);
};
Parser.prototype._writeSpecial = function(rawData, lastTagSep){
//if the previous element is text, append the last tag sep to element
if(this._contentFlags >= SpecialTags.w){
if(this._cbs.ontext) this._cbs.ontext(lastTagSep + rawData);
}
else{ //The previous element was not text
this._contentFlags += SpecialTags.w;
if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
}
};
var emptyTags = require("./ClosingTags.js").self;
Parser.prototype._isEmptyTag = function(name){
return !this._options.xmlMode && emptyTags[name];
};
Parser.prototype._processCloseTag = function(name){
if(this._stack && !this._isEmptyTag(name)){
if(this._stack && (!emptyTags[name] || this._options.xmlMode)){
var i = this._stack.length;

@@ -254,3 +251,3 @@ while(i !== 0 && this._stack[--i] !== name){}

Parser.prototype._processOpenTag = function(name, data, tagSep){
Parser.prototype._processOpenTag = function(name, data){
var type = ElementType.Tag;

@@ -266,3 +263,3 @@ if(this._options.xmlMode){ /*do nothing*/ }

//If tag self-terminates, add an explicit, separate closing tag
if(data.substr(-1) === "/" || this._isEmptyTag(name)){
if(data.substr(-1) === "/" || (emptyTags[name] && !this._options.xmlMode)){
if(this._cbs.onclosetag) this._cbs.onclosetag(name);

@@ -272,3 +269,2 @@ } else {

this._stack.push(name);
this._prevTagSep = tagSep;
}

@@ -275,0 +271,0 @@ };

{
"name": "htmlparser2",
"description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface.",
"version": "2.0.1",
"version": "2.1.0",
"author": "Felix Boehm <me@feedic.com>",

@@ -6,0 +6,0 @@ "contributors": ["Chris Winberry <chris@winberry.net>"],

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc