Socket
Socket
Sign inDemoInstall

node-html-parser

Package Overview
Dependencies
Maintainers
1
Versions
119
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

node-html-parser - npm Package Compare versions

Comparing version 4.1.5 to 5.0.0

CHANGELOG.md

235

dist/main.js

@@ -37,3 +37,3 @@ var __extends = (this && this.__extends) || (function () {

}
return to.concat(ar || from);
return to.concat(ar || Array.prototype.slice.call(from));
};

@@ -297,2 +297,3 @@ define("back", ["require", "exports"], function (require, exports) {

comment_1 = __importDefault(comment_1);
var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
function decode(val) {

@@ -442,3 +443,3 @@ // clone string

HTMLElement.prototype.quoteAttribute = function (attr) {
if (attr === null) {
if (attr == null) {
return 'null';

@@ -497,2 +498,9 @@ }

});
Object.defineProperty(HTMLElement.prototype, "isVoidElement", {
get: function () {
return voidTags.has(this.localName);
},
enumerable: false,
configurable: true
});
Object.defineProperty(HTMLElement.prototype, "rawText", {

@@ -585,10 +593,4 @@ /**

if (tag) {
// const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|'));
// const is_void = void_tags.has(tag);
var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
if (is_void) {
return "<" + tag + attrs + ">";
}
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
return this.isVoidElement ? "<" + tag + attrs + ">" : "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
}

@@ -743,59 +745,2 @@ return this.innerHTML;

});
// let matcher: Matcher;
// if (selector instanceof Matcher) {
// matcher = selector;
// matcher.reset();
// } else {
// if (selector.includes(',')) {
// const selectors = selector.split(',');
// return Array.from(selectors.reduce((pre, cur) => {
// const result = this.querySelectorAll(cur.trim());
// return result.reduce((p, c) => {
// return p.add(c);
// }, pre);
// }, new Set<HTMLElement>()));
// }
// matcher = new Matcher(selector);
// }
// interface IStack {
// 0: Node; // node
// 1: number; // children
// 2: boolean; // found flag
// }
// const stack = [] as IStack[];
// return this.childNodes.reduce((res, cur) => {
// stack.push([cur, 0, false]);
// while (stack.length) {
// const state = arr_back(stack); // get last element
// const el = state[0];
// if (state[1] === 0) {
// // Seen for first time.
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
// stack.pop();
// continue;
// }
// const html_el = el as HTMLElement;
// state[2] = matcher.advance(html_el);
// if (state[2]) {
// if (matcher.matched) {
// res.push(html_el);
// res.push(...(html_el.querySelectorAll(selector)));
// // no need to go further.
// matcher.rewind();
// stack.pop();
// continue;
// }
// }
// }
// if (state[1] < el.childNodes.length) {
// stack.push([el.childNodes[state[1]++], 0, false]);
// } else {
// if (state[2]) {
// matcher.rewind();
// }
// stack.pop();
// }
// }
// return res;
// }, [] as HTMLElement[]);
};

@@ -805,3 +750,3 @@ /**

* @param {string} selector Simplified CSS selector
* @return {HTMLElement} matching node
* @return {(HTMLElement|null)} matching node
*/

@@ -813,41 +758,42 @@ HTMLElement.prototype.querySelector = function (selector) {

});
// let matcher: Matcher;
// if (selector instanceof Matcher) {
// matcher = selector;
// matcher.reset();
// } else {
// matcher = new Matcher(selector);
// }
// const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
// for (const node of this.childNodes) {
// stack.push([node, 0, false]);
// while (stack.length) {
// const state = arr_back(stack);
// const el = state[0];
// if (state[1] === 0) {
// // Seen for first time.
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
// stack.pop();
// continue;
// }
// state[2] = matcher.advance(el as HTMLElement);
// if (state[2]) {
// if (matcher.matched) {
// return el as HTMLElement;
// }
// }
// }
// if (state[1] < el.childNodes.length) {
// stack.push([el.childNodes[state[1]++], 0, false]);
// } else {
// if (state[2]) {
// matcher.rewind();
// }
// stack.pop();
// }
// }
// }
// return null;
};
/**
* find elements by their tagName
* @param {string} tagName the tagName of the elements to select
*/
HTMLElement.prototype.getElementsByTagName = function (tagName) {
var upperCasedTagName = tagName.toUpperCase();
var re = [];
var stack = [];
var currentNodeReference = this;
var index = 0;
// index turns to undefined once the stack is empty and the first condition occurs
// which happens once all relevant children are searched through
while (index !== undefined) {
var child = void 0;
// make it work with sparse arrays
do {
child = currentNodeReference.childNodes[index++];
} while (index < currentNodeReference.childNodes.length && child === undefined);
// if the child does not exist we move on with the last provided index (which belongs to the parentNode)
if (child === undefined) {
currentNodeReference = currentNodeReference.parentNode;
index = stack.pop();
continue;
}
if (child.nodeType === type_3.default.ELEMENT_NODE) {
// https://developer.mozilla.org/en-US/docs/Web/API/Element/getElementsByTagName#syntax
if (tagName === '*' || child.tagName === upperCasedTagName)
re.push(child);
// if children are existing push the current status to the stack and keep searching for elements in the level below
if (child.childNodes.length > 0) {
stack.push(index);
currentNodeReference = child;
index = 0;
}
}
}
return re;
};
/**
* traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.

@@ -970,3 +916,3 @@ * @param selector a DOMString containing a selector list

/**
* Get escaped (as-it) attributes
* Get escaped (as-is) attributes
* @return {Object} parsed attributes

@@ -980,6 +926,10 @@ */

if (this.rawAttrs) {
var re = /([a-z()#][a-z0-9-_:()#]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/gi;
var re = /([a-zA-Z()#][a-zA-Z0-9-_:()#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
var match = void 0;
while ((match = re.exec(this.rawAttrs))) {
attrs[match[1]] = match[2] || match[3] || match[4] || null;
var key = match[1];
var val = match[2] || null;
if (val && (val[0] === "'" || val[0] === "\""))
val = val.slice(1, val.length - 1);
attrs[key] = val;
}

@@ -1180,8 +1130,4 @@ }

// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
var kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*((?=[/>]*?)|(?:.*?[\s\d/'"])|(?:.*?[\w]))(\/?)>/gi;
// <(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
// <([a-z][-.:0-9_a-z]*)\s*\/>
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/gi;
var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/g;
var kAttributePattern = /(?:^|\s)(id|class)\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+)/gi;
var kSelfClosingElements = {

@@ -1273,4 +1219,4 @@ area: true,

var element_names = Object.keys(elements);
var kBlockTextElements = element_names.map(function (it) { return new RegExp(it, 'i'); });
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp(it, 'i'); });
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^" + it + "$", 'i'); });
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^" + it + "$", 'i'); });
function element_should_be_ignore(tag) {

@@ -1291,10 +1237,15 @@ return kIgnoreElements.some(function (it) { return it.test(tag); });

data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
var lowerCaseTagName = options.lowerCaseTagName;
var dataEndPos = data.length - (frameflag.length + 2);
var frameFlagOffset = frameflag.length + 2;
while ((match = kMarkupPattern.exec(data))) {
var tagStartPos = kMarkupPattern.lastIndex - match[0].length;
// Note: Object destructuring here consistently tests as higher performance than array destructuring
// eslint-disable-next-line prefer-const
var matchText = match[0], leadingSlash = match[1], tagName = match[2], attributes = match[3], closingSlash = match[4];
var matchLength = matchText.length;
var tagStartPos = kMarkupPattern.lastIndex - matchLength;
var tagEndPos = kMarkupPattern.lastIndex;
// Add TextNode if content
if (lastTextPos > -1) {
if (lastTextPos + match[0].length < tagEndPos) {
if (lastTextPos + matchLength < tagEndPos) {
var text = data.substring(lastTextPos, tagStartPos);

@@ -1307,6 +1258,6 @@ currentParent.appendChild(new text_1.default(text, currentParent, createRange(lastTextPos, tagStartPos)));

// Skip frameflag node
if (match[2] === frameflag)
if (tagName === frameflag)
continue;
// Handle comments
if (match[0][1] === '!') {
if (matchText[1] === '!') {
if (options.comment) {

@@ -1321,14 +1272,16 @@ // Only keep what is in between <!-- and -->

// Fix tag casing if necessary
if (options.lowerCaseTagName)
match[2] = match[2].toLowerCase();
if (lowerCaseTagName)
tagName = tagName.toLowerCase();
// Handle opening tags (ie. <this> not </that>)
if (!match[1]) {
if (!leadingSlash) {
/* Populate attributes */
var attrs = {};
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(match[3]));) {
attrs[attMatch[2].toLowerCase()] = attMatch[4] || attMatch[5] || attMatch[6];
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(attributes));) {
var key = attMatch[1], val = attMatch[2];
var isQuoted = val[0] === "'" || val[0] === "\"";
attrs[key.toLowerCase()] = isQuoted ? val.slice(1, val.length - 1) : val;
}
var tagName = currentParent.rawTagName;
if (!match[4] && kElementsClosedByOpening[tagName]) {
if (kElementsClosedByOpening[tagName][match[2]]) {
var parentTagName = currentParent.rawTagName;
if (!closingSlash && kElementsClosedByOpening[parentTagName]) {
if (kElementsClosedByOpening[parentTagName][tagName]) {
stack.pop();

@@ -1339,3 +1292,3 @@ currentParent = (0, back_1.default)(stack);

// Prevent nested A tags by terminating the last A and starting a new one : see issue #144
if (match[2] === 'a' || match[2] === 'A') {
if (tagName === 'a' || tagName === 'A') {
if (noNestedTagIndex !== undefined) {

@@ -1348,15 +1301,15 @@ stack.splice(noNestedTagIndex);

var tagEndPos_1 = kMarkupPattern.lastIndex;
var tagStartPos_1 = tagEndPos_1 - match[0].length;
var tagStartPos_1 = tagEndPos_1 - matchLength;
currentParent = currentParent.appendChild(
// Initialize range (end position updated later for closed tags)
new HTMLElement(match[2], attrs, match[3], null, createRange(tagStartPos_1, tagEndPos_1)));
new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1)));
stack.push(currentParent);
if (is_block_text_element(match[2])) {
if (is_block_text_element(tagName)) {
// Find closing tag
var closeMarkup = "</" + match[2] + ">";
var closeIndex = options.lowerCaseTagName
var closeMarkup = "</" + tagName + ">";
var closeIndex = lowerCaseTagName
? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
: data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
var textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
if (element_should_be_ignore(match[2])) {
if (element_should_be_ignore(tagName)) {
var text = data.substring(tagEndPos_1, textEndPos);

@@ -1373,3 +1326,3 @@ if (text.length > 0 && /\S/.test(text)) {

// Cause to be treated as self-closing, because no close found
match[1] = 'true';
leadingSlash = '/';
}

@@ -1379,7 +1332,7 @@ }

// Handle closing tags or self-closed elements (ie </tag> or <br>)
if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
while (true) {
if (match[2] === 'a' || match[2] === 'A')
if (tagName === 'a' || tagName === 'A')
noNestedTagIndex = undefined;
if (currentParent.rawTagName === match[2]) {
if (currentParent.rawTagName === tagName) {
// Update range end for closed tag

@@ -1392,6 +1345,6 @@ currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];

else {
var tagName = currentParent.tagName;
var parentTagName = currentParent.tagName;
// Trying to close current tag, and move on
if (kElementsClosedByClosing[tagName]) {
if (kElementsClosedByClosing[tagName][match[2]]) {
if (kElementsClosedByClosing[parentTagName]) {
if (kElementsClosedByClosing[parentTagName][tagName]) {
stack.pop();

@@ -1398,0 +1351,0 @@ currentParent = (0, back_1.default)(stack);

12

dist/nodes/html.d.ts

@@ -80,2 +80,3 @@ import Node from './node';

get localName(): string;
get isVoidElement(): boolean;
/**

@@ -129,6 +130,11 @@ * Get escpaed (as-it) text value of current node and its children.

* @param {string} selector Simplified CSS selector
* @return {HTMLElement} matching node
* @return {(HTMLElement|null)} matching node
*/
querySelector(selector: string): HTMLElement;
querySelector(selector: string): HTMLElement | null;
/**
* find elements by their tagName
* @param {string} tagName the tagName of the elements to select
*/
getElementsByTagName(tagName: string): Array<HTMLElement>;
/**
* traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.

@@ -162,3 +168,3 @@ * @param selector a DOMString containing a selector list

/**
* Get escaped (as-it) attributes
* Get escaped (as-is) attributes
* @return {Object} parsed attributes

@@ -165,0 +171,0 @@ */

@@ -35,3 +35,3 @@ "use strict";

}
return to.concat(ar || from);
return to.concat(ar || Array.prototype.slice.call(from));
};

@@ -51,2 +51,3 @@ var __importDefault = (this && this.__importDefault) || function (mod) {

var comment_1 = __importDefault(require("./comment"));
var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
function decode(val) {

@@ -196,3 +197,3 @@ // clone string

HTMLElement.prototype.quoteAttribute = function (attr) {
if (attr === null) {
if (attr == null) {
return 'null';

@@ -251,2 +252,9 @@ }

});
Object.defineProperty(HTMLElement.prototype, "isVoidElement", {
get: function () {
return voidTags.has(this.localName);
},
enumerable: false,
configurable: true
});
Object.defineProperty(HTMLElement.prototype, "rawText", {

@@ -339,10 +347,4 @@ /**

if (tag) {
// const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|'));
// const is_void = void_tags.has(tag);
var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
if (is_void) {
return "<" + tag + attrs + ">";
}
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
return this.isVoidElement ? "<" + tag + attrs + ">" : "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
}

@@ -497,59 +499,2 @@ return this.innerHTML;

});
// let matcher: Matcher;
// if (selector instanceof Matcher) {
// matcher = selector;
// matcher.reset();
// } else {
// if (selector.includes(',')) {
// const selectors = selector.split(',');
// return Array.from(selectors.reduce((pre, cur) => {
// const result = this.querySelectorAll(cur.trim());
// return result.reduce((p, c) => {
// return p.add(c);
// }, pre);
// }, new Set<HTMLElement>()));
// }
// matcher = new Matcher(selector);
// }
// interface IStack {
// 0: Node; // node
// 1: number; // children
// 2: boolean; // found flag
// }
// const stack = [] as IStack[];
// return this.childNodes.reduce((res, cur) => {
// stack.push([cur, 0, false]);
// while (stack.length) {
// const state = arr_back(stack); // get last element
// const el = state[0];
// if (state[1] === 0) {
// // Seen for first time.
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
// stack.pop();
// continue;
// }
// const html_el = el as HTMLElement;
// state[2] = matcher.advance(html_el);
// if (state[2]) {
// if (matcher.matched) {
// res.push(html_el);
// res.push(...(html_el.querySelectorAll(selector)));
// // no need to go further.
// matcher.rewind();
// stack.pop();
// continue;
// }
// }
// }
// if (state[1] < el.childNodes.length) {
// stack.push([el.childNodes[state[1]++], 0, false]);
// } else {
// if (state[2]) {
// matcher.rewind();
// }
// stack.pop();
// }
// }
// return res;
// }, [] as HTMLElement[]);
};

@@ -559,3 +504,3 @@ /**

* @param {string} selector Simplified CSS selector
* @return {HTMLElement} matching node
* @return {(HTMLElement|null)} matching node
*/

@@ -567,41 +512,42 @@ HTMLElement.prototype.querySelector = function (selector) {

});
// let matcher: Matcher;
// if (selector instanceof Matcher) {
// matcher = selector;
// matcher.reset();
// } else {
// matcher = new Matcher(selector);
// }
// const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
// for (const node of this.childNodes) {
// stack.push([node, 0, false]);
// while (stack.length) {
// const state = arr_back(stack);
// const el = state[0];
// if (state[1] === 0) {
// // Seen for first time.
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
// stack.pop();
// continue;
// }
// state[2] = matcher.advance(el as HTMLElement);
// if (state[2]) {
// if (matcher.matched) {
// return el as HTMLElement;
// }
// }
// }
// if (state[1] < el.childNodes.length) {
// stack.push([el.childNodes[state[1]++], 0, false]);
// } else {
// if (state[2]) {
// matcher.rewind();
// }
// stack.pop();
// }
// }
// }
// return null;
};
/**
* find elements by their tagName
* @param {string} tagName the tagName of the elements to select
*/
HTMLElement.prototype.getElementsByTagName = function (tagName) {
var upperCasedTagName = tagName.toUpperCase();
var re = [];
var stack = [];
var currentNodeReference = this;
var index = 0;
// index turns to undefined once the stack is empty and the first condition occurs
// which happens once all relevant children are searched through
while (index !== undefined) {
var child = void 0;
// make it work with sparse arrays
do {
child = currentNodeReference.childNodes[index++];
} while (index < currentNodeReference.childNodes.length && child === undefined);
// if the child does not exist we move on with the last provided index (which belongs to the parentNode)
if (child === undefined) {
currentNodeReference = currentNodeReference.parentNode;
index = stack.pop();
continue;
}
if (child.nodeType === type_1.default.ELEMENT_NODE) {
// https://developer.mozilla.org/en-US/docs/Web/API/Element/getElementsByTagName#syntax
if (tagName === '*' || child.tagName === upperCasedTagName)
re.push(child);
// if children are existing push the current status to the stack and keep searching for elements in the level below
if (child.childNodes.length > 0) {
stack.push(index);
currentNodeReference = child;
index = 0;
}
}
}
return re;
};
/**
* traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.

@@ -724,3 +670,3 @@ * @param selector a DOMString containing a selector list

/**
* Get escaped (as-it) attributes
* Get escaped (as-is) attributes
* @return {Object} parsed attributes

@@ -734,6 +680,10 @@ */

if (this.rawAttrs) {
var re = /([a-z()#][a-z0-9-_:()#]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/gi;
var re = /([a-zA-Z()#][a-zA-Z0-9-_:()#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
var match = void 0;
while ((match = re.exec(this.rawAttrs))) {
attrs[match[1]] = match[2] || match[3] || match[4] || null;
var key = match[1];
var val = match[2] || null;
if (val && (val[0] === "'" || val[0] === "\""))
val = val.slice(1, val.length - 1);
attrs[key] = val;
}

@@ -934,8 +884,4 @@ }

// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
var kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*((?=[/>]*?)|(?:.*?[\s\d/'"])|(?:.*?[\w]))(\/?)>/gi;
// <(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
// <([a-z][-.:0-9_a-z]*)\s*\/>
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/gi;
var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/g;
var kAttributePattern = /(?:^|\s)(id|class)\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+)/gi;
var kSelfClosingElements = {

@@ -1027,4 +973,4 @@ area: true,

var element_names = Object.keys(elements);
var kBlockTextElements = element_names.map(function (it) { return new RegExp(it, 'i'); });
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp(it, 'i'); });
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^" + it + "$", 'i'); });
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^" + it + "$", 'i'); });
function element_should_be_ignore(tag) {

@@ -1045,10 +991,15 @@ return kIgnoreElements.some(function (it) { return it.test(tag); });

data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
var lowerCaseTagName = options.lowerCaseTagName;
var dataEndPos = data.length - (frameflag.length + 2);
var frameFlagOffset = frameflag.length + 2;
while ((match = kMarkupPattern.exec(data))) {
var tagStartPos = kMarkupPattern.lastIndex - match[0].length;
// Note: Object destructuring here consistently tests as higher performance than array destructuring
// eslint-disable-next-line prefer-const
var matchText = match[0], leadingSlash = match[1], tagName = match[2], attributes = match[3], closingSlash = match[4];
var matchLength = matchText.length;
var tagStartPos = kMarkupPattern.lastIndex - matchLength;
var tagEndPos = kMarkupPattern.lastIndex;
// Add TextNode if content
if (lastTextPos > -1) {
if (lastTextPos + match[0].length < tagEndPos) {
if (lastTextPos + matchLength < tagEndPos) {
var text = data.substring(lastTextPos, tagStartPos);

@@ -1061,6 +1012,6 @@ currentParent.appendChild(new text_1.default(text, currentParent, createRange(lastTextPos, tagStartPos)));

// Skip frameflag node
if (match[2] === frameflag)
if (tagName === frameflag)
continue;
// Handle comments
if (match[0][1] === '!') {
if (matchText[1] === '!') {
if (options.comment) {

@@ -1075,14 +1026,16 @@ // Only keep what is in between <!-- and -->

// Fix tag casing if necessary
if (options.lowerCaseTagName)
match[2] = match[2].toLowerCase();
if (lowerCaseTagName)
tagName = tagName.toLowerCase();
// Handle opening tags (ie. <this> not </that>)
if (!match[1]) {
if (!leadingSlash) {
/* Populate attributes */
var attrs = {};
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(match[3]));) {
attrs[attMatch[2].toLowerCase()] = attMatch[4] || attMatch[5] || attMatch[6];
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(attributes));) {
var key = attMatch[1], val = attMatch[2];
var isQuoted = val[0] === "'" || val[0] === "\"";
attrs[key.toLowerCase()] = isQuoted ? val.slice(1, val.length - 1) : val;
}
var tagName = currentParent.rawTagName;
if (!match[4] && kElementsClosedByOpening[tagName]) {
if (kElementsClosedByOpening[tagName][match[2]]) {
var parentTagName = currentParent.rawTagName;
if (!closingSlash && kElementsClosedByOpening[parentTagName]) {
if (kElementsClosedByOpening[parentTagName][tagName]) {
stack.pop();

@@ -1093,3 +1046,3 @@ currentParent = (0, back_1.default)(stack);

// Prevent nested A tags by terminating the last A and starting a new one : see issue #144
if (match[2] === 'a' || match[2] === 'A') {
if (tagName === 'a' || tagName === 'A') {
if (noNestedTagIndex !== undefined) {

@@ -1102,15 +1055,15 @@ stack.splice(noNestedTagIndex);

var tagEndPos_1 = kMarkupPattern.lastIndex;
var tagStartPos_1 = tagEndPos_1 - match[0].length;
var tagStartPos_1 = tagEndPos_1 - matchLength;
currentParent = currentParent.appendChild(
// Initialize range (end position updated later for closed tags)
new HTMLElement(match[2], attrs, match[3], null, createRange(tagStartPos_1, tagEndPos_1)));
new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1)));
stack.push(currentParent);
if (is_block_text_element(match[2])) {
if (is_block_text_element(tagName)) {
// Find closing tag
var closeMarkup = "</" + match[2] + ">";
var closeIndex = options.lowerCaseTagName
var closeMarkup = "</" + tagName + ">";
var closeIndex = lowerCaseTagName
? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
: data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
var textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
if (element_should_be_ignore(match[2])) {
if (element_should_be_ignore(tagName)) {
var text = data.substring(tagEndPos_1, textEndPos);

@@ -1127,3 +1080,3 @@ if (text.length > 0 && /\S/.test(text)) {

// Cause to be treated as self-closing, because no close found
match[1] = 'true';
leadingSlash = '/';
}

@@ -1133,7 +1086,7 @@ }

// Handle closing tags or self-closed elements (ie </tag> or <br>)
if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
while (true) {
if (match[2] === 'a' || match[2] === 'A')
if (tagName === 'a' || tagName === 'A')
noNestedTagIndex = undefined;
if (currentParent.rawTagName === match[2]) {
if (currentParent.rawTagName === tagName) {
// Update range end for closed tag

@@ -1146,6 +1099,6 @@ currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];

else {
var tagName = currentParent.tagName;
var parentTagName = currentParent.tagName;
// Trying to close current tag, and move on
if (kElementsClosedByClosing[tagName]) {
if (kElementsClosedByClosing[tagName][match[2]]) {
if (kElementsClosedByClosing[parentTagName]) {
if (kElementsClosedByClosing[parentTagName][tagName]) {
stack.pop();

@@ -1152,0 +1105,0 @@ currentParent = (0, back_1.default)(stack);

{
"name": "node-html-parser",
"version": "4.1.5",
"version": "5.0.0",
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
"main": "dist/index.js",
"module": "dist/esm/index.js",
"types": "dist/index.d.ts",
"scripts": {
"test": "mocha",
"compile": "tsc",
"build": "npm run lint && npm run clean && npm run compile:cjs && npm run compile:amd",
"compile:cjs": "tsc -m commonjs",
"compile:amd": "tsc -t es5 -m amd -d false --outFile ./dist/main.js",
"lint": "eslint ./src/*.ts ./src/**/*.ts",
"clean": "del-cli ./dist/",
"ts:cjs": "tsc -m commonjs",
"ts:amd": "tsc -t es5 -m amd -d false --outFile ./dist/main.js",
"ts:esm": "tsc -t es2019 -m esnext -d false --outDir ./dist/esm/",
"build": "npm run lint && npm run clean && npm run ts:cjs && npm run ts:amd && npm run ts:esm",
"dev": "tsc -w & mocha -w ./test/*.js",
"pretest": "tsc -m commonjs",
"release": "yarn build && np",
"prepare": "npm run build"
"---------------": "",
"test": "mocha ./test/tests/**/*.js",
"test:src": "cross-env TEST_TARGET=src mocha ./test/tests",
"test:dist": "cross-env TEST_TARGET=dist mocha ./test/tests",
"benchmark": "node ./test/benchmark/compare.mjs",
"--------------- ": "",
"clean": "npx rimraf ./dist/",
"clean:global": "yarn run clean && npx rimraf yarn.lock test/yarn.lock test/node_modules node_modules",
"reset": "yarn run clean:global && yarn install && yarn build",
"--------------- ": "",
"posttest": "yarn run benchmark",
"prepare": "cd test && yarn install"
},

@@ -27,5 +32,13 @@ "keywords": [

],
"files": [
"dist",
"esm",
"README.md",
"LICENSE",
"CHANGELOG.md"
],
"author": "Xiaoyi Shi <ashi009@gmail.com>",
"contributors": [
"taoqf<tao_qiufeng@126.com>"
"taoqf <tao_qiufeng@126.com>",
"Ron S. <ron@nonara.com>"
],

@@ -49,4 +62,4 @@ "license": "MIT",

"cheerio": "^1.0.0-rc.5",
"del-cli": "latest",
"eslint": "latest",
"rimraf": "^3.0.2",
"eslint": "^7.32.0",
"eslint-config-prettier": "latest",

@@ -64,4 +77,7 @@ "eslint-plugin-import": "latest",

"spec": "latest",
"standard-version": "^9.3.1",
"travis-cov": "latest",
"typescript": "next"
"ts-node": "^10.2.1",
"typescript": "latest",
"cross-env": "^7.0.3"
},

@@ -90,3 +106,7 @@ "config": {

"homepage": "https://github.com/taoqf/node-fast-html-parser",
"sideEffects": false
"sideEffects": false,
"exports": {
"require": "./dist/index.js",
"import": "./esm/index.js"
}
}

@@ -1,2 +0,2 @@

# Fast HTML Parser [![NPM version](https://badge.fury.io/js/node-html-parser.png)](http://badge.fury.io/js/node-html-parser) [![Build Status](https://travis-ci.org/taoqf/node-html-parser.svg?branch=master)](https://travis-ci.org/taoqf/node-html-parser)
# Fast HTML Parser [![NPM version](https://badge.fury.io/js/node-html-parser.png)](http://badge.fury.io/js/node-html-parser) [![Build Status](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Factions-badge.atrox.dev%2Ftaoqf%2Fnode-html-parser%2Fbadge%3Fref%3Dmaster&style=flat)](https://actions-badge.atrox.dev/taoqf/node-html-parser/goto?ref=master)

@@ -81,3 +81,3 @@ Fast HTML Parser is a _very fast_ HTML parser. Which will generate a simplified

lowerCaseTagName: false, // convert tag name to lower case (hurt performance heavily)
comment: false // retrieve comments (hurt performance slightly)
comment: false, // retrieve comments (hurt performance slightly)
blockTextElements: {

@@ -116,2 +116,8 @@ script: true, // keep text content when parsing

### HTMLElement#getElementsByTagName(tagName)
Get all elements with the specified tagName.
Note: * for all elements.
### HTMLElement#closest(selector)

@@ -204,3 +210,3 @@

Get escpaed (as-it) text value of current node and its children. May have
Get escaped (as-it) text value of current node and its children. May have
`&amp;` in it. (fast)

@@ -258,2 +264,2 @@

Corresponding source code start and end indexes (ie [ 0, 40 ])
Corresponding source code start and end indexes (ie [ 0, 40 ])
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc