Socket
Socket
Sign inDemoInstall

node-html-parser

Package Overview
Dependencies
Maintainers
1
Versions
119
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

node-html-parser - npm Package Compare versions

Comparing version 2.2.1 to 3.0.0

322

dist/esm/matcher.js

@@ -1,241 +0,101 @@

/**
* Cache to store generated match functions
* @type {Object}
*/
let pMatchFunctionCache = {};
function compare_tagname(tag1, tag2) {
if (!tag1) {
return !tag2;
}
if (!tag2) {
return !tag1;
}
return tag1.toLowerCase() === tag2.toLowerCase();
import NodeType from './nodes/type';
function isTag(node) {
return node.nodeType === NodeType.ELEMENT_NODE;
}
/**
* Function cache
*/
const functionCache = {
f145(el, tagName, classes) {
'use strict';
tagName = tagName || '';
classes = classes || [];
if (el.id !== tagName.substr(1)) {
return false;
}
for (let cls = classes, i = 0; i < cls.length; i++) {
if (el.classNames.indexOf(cls[i]) === -1) {
return false;
function getAttributeValue(elem, name) {
return elem.getAttribute(name);
}
function getName(elem) {
return (elem.rawTagName || '').toLowerCase();
}
function getChildren(node) {
return node.childNodes;
}
function getParent(node) {
return node.parentNode;
}
function getText(node) {
return node.text;
}
function removeSubsets(nodes) {
let idx = nodes.length;
let node;
let ancestor;
let replace;
// Check if each node (or one of its ancestors) is already contained in the
// array.
while (--idx > -1) {
node = ancestor = nodes[idx];
// Temporarily remove the node under consideration
nodes[idx] = null;
replace = true;
while (ancestor) {
if (nodes.indexOf(ancestor) > -1) {
replace = false;
nodes.splice(idx, 1);
break;
}
ancestor = getParent(ancestor);
}
return true;
},
f45(el, tagName, classes) {
'use strict';
tagName = tagName || '';
classes = classes || [];
for (let cls = classes, i = 0; i < cls.length; i++) {
if (el.classNames.indexOf(cls[i]) === -1) {
return false;
}
// If the node has been found to be unique, re-insert it.
if (replace) {
nodes[idx] = node;
}
return true;
},
f15(el, tagName) {
'use strict';
tagName = tagName || '';
if (el.id !== tagName.substr(1)) {
return false;
}
return nodes;
}
function existsOne(test, elems) {
return elems.some((elem) => {
return isTag(elem) ? test(elem) || existsOne(test, getChildren(elem)) : false;
});
}
function getSiblings(node) {
const parent = getParent(node);
return parent && getChildren(parent);
}
function hasAttrib(elem, name) {
return getAttributeValue(elem, name) !== undefined;
}
function findOne(test, elems) {
let elem = null;
for (let i = 0, l = elems.length; i < l && !elem; i++) {
const el = elems[i];
if (test(el)) {
elem = el;
}
return true;
},
f1(el, tagName) {
'use strict';
tagName = tagName || '';
if (el.id !== tagName.substr(1)) {
return false;
}
},
f5() {
'use strict';
return true;
},
f55(el, tagName, classes, attr_key) {
'use strict';
tagName = tagName || '';
classes = classes || [];
attr_key = attr_key || '';
const attrs = el.attributes;
return attrs.hasOwnProperty(attr_key);
},
f245(el, tagName, classes, attr_key, value) {
'use strict';
tagName = tagName || '';
classes = classes || [];
attr_key = (attr_key || '').toLowerCase();
value = value || '';
const attrs = el.attributes;
return Object.keys(attrs).some((key) => {
const val = attrs[key];
return key.toLowerCase() === attr_key && val === value;
});
// for (let cls = classes, i = 0; i < cls.length; i++) {if (el.classNames.indexOf(cls[i]) === -1){ return false;}}
// return true;
},
f25(el, tagName, classes, attr_key, value) {
'use strict';
tagName = tagName || '';
classes = classes || [];
attr_key = (attr_key || '').toLowerCase();
value = value || '';
const attrs = el.attributes;
return Object.keys(attrs).some((key) => {
const val = attrs[key];
return key.toLowerCase() === attr_key && val === value;
});
// return true;
},
f2(el, tagName, classes, attr_key, value) {
'use strict';
tagName = tagName || '';
classes = classes || [];
attr_key = (attr_key || '').toLowerCase();
value = value || '';
const attrs = el.attributes;
return Object.keys(attrs).some((key) => {
const val = attrs[key];
return key.toLowerCase() === attr_key && val === value;
});
},
f345(el, tagName, classes) {
'use strict';
tagName = tagName || '';
classes = classes || [];
if (!compare_tagname(el.tagName, tagName)) {
return false;
}
for (let cls = classes, i = 0; i < cls.length; i++) {
if (el.classNames.indexOf(cls[i]) === -1) {
return false;
else {
const childs = getChildren(el);
if (childs && childs.length > 0) {
elem = findOne(test, childs);
}
}
return true;
},
f35(el, tagName) {
'use strict';
tagName = tagName || '';
return compare_tagname(el.tagName, tagName);
},
f3(el, tagName) {
'use strict';
tagName = tagName || '';
// if (el.tagName !== tagName) {
// return false;
// }
return compare_tagname(el.tagName, tagName);
}
};
/**
* Matcher class to make CSS match
*
* @class Matcher
*/
export default class Matcher {
/**
* Creates an instance of Matcher.
* @param {string} selector
*
* @memberof Matcher
*/
constructor(selector) {
this.nextMatch = 0;
this.matchers = selector.split(' ').map((matcher) => {
if (pMatchFunctionCache[matcher]) {
return pMatchFunctionCache[matcher];
}
const parts = matcher.split('.');
const tagName = parts[0];
const classes = parts.slice(1).sort();
// let source = '"use strict";';
let function_name = 'f';
let attr_key = '';
let value = '';
if (tagName && tagName !== '*') {
if (tagName.startsWith('#')) {
// source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;';// 1
function_name += '1';
}
else {
// https://github.com/taoqf/node-html-parser/issues/86
// const reg = /\[\s*([\w-]+)(\s*=\s*(((?<quote>'|")\s*(.*)(\k<quote>))|(\S*)))?\s*\]/.exec(tagName);
// `[a-b]`,`[ a-b ]`,`[a-b=c]`, `[a-b=c'd]`,`[a-b='c\' d"e ']`,`[ a-b = 'c\' d"e ' ]`,`[a-b="c' d\"e " ]`,`[ a-b = "c' d\"e " ]`
const reg = /\[\s*([\w-]+)(\s*=\s*(('\s*(.*)'|"\s*(.*)")|(\S*)))?\s*\]/.exec(tagName);
if (reg) {
attr_key = reg[1];
value = reg[5] || reg[6] || reg[7];
// source += `let attrs = el.attributes;for (let key in attrs){const val = attrs[key]; if (key == "${attr_key}" && val == "${value}"){return true;}} return false;`;// 2
function_name += '2';
}
else {
// source += 'if (el.tagName != ' + JSON.stringify(tagName) + ') return false;';// 3
function_name += '3';
}
}
}
if (classes.length > 0) {
// source += 'for (let cls = ' + JSON.stringify(classes) + ', i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;';// 4
function_name += '4';
}
// source += 'return true;';// 5
function_name += '5';
const obj = {
func: functionCache[function_name],
tagName: tagName || '',
classes: classes || '',
attr_key: attr_key || '',
value: value || ''
};
// source = source || '';
return (pMatchFunctionCache[matcher] = obj);
});
return elem;
}
function findAll(test, nodes) {
let result = [];
for (let i = 0, j = nodes.length; i < j; i++) {
if (!isTag(nodes[i]))
continue;
if (test(nodes[i]))
result.push(nodes[i]);
const childs = getChildren(nodes[i]);
if (childs)
result = result.concat(findAll(test, childs));
}
/**
* Trying to advance match pointer
* @param {HTMLElement} el element to make the match
* @return {bool} true when pointer advanced.
*/
advance(el) {
if (this.nextMatch < this.matchers.length &&
this.matchers[this.nextMatch].func(el, this.matchers[this.nextMatch].tagName, this.matchers[this.nextMatch].classes, this.matchers[this.nextMatch].attr_key, this.matchers[this.nextMatch].value)) {
this.nextMatch++;
return true;
}
return false;
}
/**
* Rewind the match pointer
*/
rewind() {
this.nextMatch--;
}
/**
* Trying to determine if match made.
* @return {bool} true when the match is made
*/
get matched() {
return this.nextMatch === this.matchers.length;
}
/**
* Rest match pointer.
* @return {[type]} [description]
*/
reset() {
this.nextMatch = 0;
}
/**
* flush cache to free memory
*/
flushCache() {
pMatchFunctionCache = {};
}
return result;
}
export default {
isTag,
getAttributeValue,
getName,
getChildren,
getParent,
getText,
removeSubsets,
existsOne,
getSiblings,
hasAttrib,
findOne,
findAll
};
import Node from './node';
import NodeType from './type';
export default class CommentNode extends Node {
constructor(rawText) {
super();
constructor(rawText, parentNode) {
super(parentNode);
this.rawText = rawText;

@@ -7,0 +7,0 @@ /**

import he from 'he';
import { selectAll, selectOne } from 'css-select';
import Node from './node';

@@ -44,4 +45,4 @@ import NodeType from './type';

*/
constructor(tagName, keyAttrs, rawAttrs = '', parentNode = null) {
super();
constructor(tagName, keyAttrs, rawAttrs = '', parentNode) {
super(parentNode);
this.rawAttrs = rawAttrs;

@@ -126,3 +127,3 @@ this.parentNode = parentNode;

set textContent(val) {
const content = [new TextNode(val)];
const content = [new TextNode(val, this)];
this.childNodes = content;

@@ -204,3 +205,3 @@ }

const r = parse(content, options);
content = r.childNodes.length ? r.childNodes : [new TextNode(content)];
content = r.childNodes.length ? r.childNodes : [new TextNode(content, this)];
}

@@ -288,60 +289,66 @@ this.childNodes = content;

* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement[]} matching elements
*/
querySelectorAll(selector) {
let matcher;
if (selector instanceof Matcher) {
matcher = selector;
matcher.reset();
}
else {
if (selector.includes(',')) {
const selectors = selector.split(',');
return Array.from(selectors.reduce((pre, cur) => {
const result = this.querySelectorAll(cur.trim());
return result.reduce((p, c) => {
return p.add(c);
}, pre);
}, new Set()));
}
matcher = new Matcher(selector);
}
const stack = [];
return this.childNodes.reduce((res, cur) => {
stack.push([cur, 0, false]);
while (stack.length) {
const state = arr_back(stack); // get last element
const el = state[0];
if (state[1] === 0) {
// Seen for first time.
if (el.nodeType !== NodeType.ELEMENT_NODE) {
stack.pop();
continue;
}
const html_el = el;
state[2] = matcher.advance(html_el);
if (state[2]) {
if (matcher.matched) {
res.push(html_el);
res.push(...(html_el.querySelectorAll(selector)));
// no need to go further.
matcher.rewind();
stack.pop();
continue;
}
}
}
if (state[1] < el.childNodes.length) {
stack.push([el.childNodes[state[1]++], 0, false]);
}
else {
if (state[2]) {
matcher.rewind();
}
stack.pop();
}
}
return res;
}, []);
return selectAll(selector, this, {
xmlMode: true,
adapter: Matcher
});
// let matcher: Matcher;
// if (selector instanceof Matcher) {
// matcher = selector;
// matcher.reset();
// } else {
// if (selector.includes(',')) {
// const selectors = selector.split(',');
// return Array.from(selectors.reduce((pre, cur) => {
// const result = this.querySelectorAll(cur.trim());
// return result.reduce((p, c) => {
// return p.add(c);
// }, pre);
// }, new Set<HTMLElement>()));
// }
// matcher = new Matcher(selector);
// }
// interface IStack {
// 0: Node; // node
// 1: number; // children
// 2: boolean; // found flag
// }
// const stack = [] as IStack[];
// return this.childNodes.reduce((res, cur) => {
// stack.push([cur, 0, false]);
// while (stack.length) {
// const state = arr_back(stack); // get last element
// const el = state[0];
// if (state[1] === 0) {
// // Seen for first time.
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
// stack.pop();
// continue;
// }
// const html_el = el as HTMLElement;
// state[2] = matcher.advance(html_el);
// if (state[2]) {
// if (matcher.matched) {
// res.push(html_el);
// res.push(...(html_el.querySelectorAll(selector)));
// // no need to go further.
// matcher.rewind();
// stack.pop();
// continue;
// }
// }
// }
// if (state[1] < el.childNodes.length) {
// stack.push([el.childNodes[state[1]++], 0, false]);
// } else {
// if (state[2]) {
// matcher.rewind();
// }
// stack.pop();
// }
// }
// return res;
// }, [] as HTMLElement[]);
}

@@ -351,45 +358,46 @@ /**

* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement} matching node
*/
querySelector(selector) {
let matcher;
if (selector instanceof Matcher) {
matcher = selector;
matcher.reset();
}
else {
matcher = new Matcher(selector);
}
const stack = [];
for (const node of this.childNodes) {
stack.push([node, 0, false]);
while (stack.length) {
const state = arr_back(stack);
const el = state[0];
if (state[1] === 0) {
// Seen for first time.
if (el.nodeType !== NodeType.ELEMENT_NODE) {
stack.pop();
continue;
}
state[2] = matcher.advance(el);
if (state[2]) {
if (matcher.matched) {
return el;
}
}
}
if (state[1] < el.childNodes.length) {
stack.push([el.childNodes[state[1]++], 0, false]);
}
else {
if (state[2]) {
matcher.rewind();
}
stack.pop();
}
}
}
return null;
return selectOne(selector, this, {
xmlMode: true,
adapter: Matcher
});
// let matcher: Matcher;
// if (selector instanceof Matcher) {
// matcher = selector;
// matcher.reset();
// } else {
// matcher = new Matcher(selector);
// }
// const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
// for (const node of this.childNodes) {
// stack.push([node, 0, false]);
// while (stack.length) {
// const state = arr_back(stack);
// const el = state[0];
// if (state[1] === 0) {
// // Seen for first time.
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
// stack.pop();
// continue;
// }
// state[2] = matcher.advance(el as HTMLElement);
// if (state[2]) {
// if (matcher.matched) {
// return el as HTMLElement;
// }
// }
// }
// if (state[1] < el.childNodes.length) {
// stack.push([el.childNodes[state[1]++], 0, false]);
// } else {
// if (state[2]) {
// matcher.rewind();
// }
// stack.pop();
// }
// }
// }
// return null;
}

@@ -404,5 +412,3 @@ /**

this.childNodes.push(node);
if (node instanceof HTMLElement) {
node.parentNode = this;
}
node.parentNode = this;
return node;

@@ -733,3 +739,3 @@ }

}
const root = new HTMLElement(null, {});
const root = new HTMLElement(null, {}, '', null);
let currentParent = root;

@@ -746,3 +752,3 @@ const stack = [root];

const text = data.substring(lastTextPos, kMarkupPattern.lastIndex - match[0].length);
currentParent.appendChild(new TextNode(text));
currentParent.appendChild(new TextNode(text, currentParent));
}

@@ -759,3 +765,3 @@ }

const text = data.substring(lastTextPos - 3, lastTextPos - match[0].length + 4);
currentParent.appendChild(new CommentNode(text));
currentParent.appendChild(new CommentNode(text, currentParent));
}

@@ -782,3 +788,3 @@ continue;

// https://github.com/taoqf/node-html-parser/issues/38
currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3], null));
stack.push(currentParent);

@@ -804,3 +810,3 @@ if (is_block_text_element(match[2])) {

if (text.length > 0) {
currentParent.appendChild(new TextNode(text));
currentParent.appendChild(new TextNode(text, currentParent));
}

@@ -807,0 +813,0 @@ }

@@ -5,3 +5,4 @@ /**

export default class Node {
constructor() {
constructor(parentNode = null) {
this.parentNode = parentNode;
this.childNodes = [];

@@ -8,0 +9,0 @@ }

@@ -8,4 +8,4 @@ import NodeType from './type';

export default class TextNode extends Node {
constructor(rawText) {
super();
constructor(rawText, parentNode) {
super(parentNode);
this.rawText = rawText;

@@ -12,0 +12,0 @@ /**

@@ -50,3 +50,5 @@ var __extends = (this && this.__extends) || (function () {

var Node = /** @class */ (function () {
function Node() {
function Node(parentNode) {
if (parentNode === void 0) { parentNode = null; }
this.parentNode = parentNode;
this.childNodes = [];

@@ -82,4 +84,4 @@ }

__extends(CommentNode, _super);
function CommentNode(rawText) {
var _this = _super.call(this) || this;
function CommentNode(rawText, parentNode) {
var _this = _super.call(this, parentNode) || this;
_this.rawText = rawText;

@@ -122,4 +124,4 @@ /**

__extends(TextNode, _super);
function TextNode(rawText) {
var _this = _super.call(this) || this;
function TextNode(rawText, parentNode) {
var _this = _super.call(this, parentNode) || this;
_this.rawText = rawText;

@@ -162,252 +164,106 @@ /**

});
define("matcher", ["require", "exports"], function (require, exports) {
define("matcher", ["require", "exports", "nodes/type"], function (require, exports, type_3) {
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
/**
* Cache to store generated match functions
* @type {Object}
*/
var pMatchFunctionCache = {};
function compare_tagname(tag1, tag2) {
if (!tag1) {
return !tag2;
}
if (!tag2) {
return !tag1;
}
return tag1.toLowerCase() === tag2.toLowerCase();
type_3 = __importDefault(type_3);
function isTag(node) {
return node.nodeType === type_3.default.ELEMENT_NODE;
}
/**
* Function cache
*/
var functionCache = {
f145: function (el, tagName, classes) {
'use strict';
tagName = tagName || '';
classes = classes || [];
if (el.id !== tagName.substr(1)) {
return false;
}
for (var cls = classes, i = 0; i < cls.length; i++) {
if (el.classNames.indexOf(cls[i]) === -1) {
return false;
function getAttributeValue(elem, name) {
return elem.getAttribute(name);
}
function getName(elem) {
return (elem.rawTagName || '').toLowerCase();
}
function getChildren(node) {
return node.childNodes;
}
function getParent(node) {
return node.parentNode;
}
function getText(node) {
return node.text;
}
function removeSubsets(nodes) {
var idx = nodes.length;
var node;
var ancestor;
var replace;
// Check if each node (or one of its ancestors) is already contained in the
// array.
while (--idx > -1) {
node = ancestor = nodes[idx];
// Temporarily remove the node under consideration
nodes[idx] = null;
replace = true;
while (ancestor) {
if (nodes.indexOf(ancestor) > -1) {
replace = false;
nodes.splice(idx, 1);
break;
}
ancestor = getParent(ancestor);
}
return true;
},
f45: function (el, tagName, classes) {
'use strict';
tagName = tagName || '';
classes = classes || [];
for (var cls = classes, i = 0; i < cls.length; i++) {
if (el.classNames.indexOf(cls[i]) === -1) {
return false;
}
// If the node has been found to be unique, re-insert it.
if (replace) {
nodes[idx] = node;
}
return true;
},
f15: function (el, tagName) {
'use strict';
tagName = tagName || '';
if (el.id !== tagName.substr(1)) {
return false;
}
return nodes;
}
function existsOne(test, elems) {
return elems.some(function (elem) {
return isTag(elem) ? test(elem) || existsOne(test, getChildren(elem)) : false;
});
}
function getSiblings(node) {
var parent = getParent(node);
return parent && getChildren(parent);
}
function hasAttrib(elem, name) {
return getAttributeValue(elem, name) !== undefined;
}
function findOne(test, elems) {
var elem = null;
for (var i = 0, l = elems.length; i < l && !elem; i++) {
var el = elems[i];
if (test(el)) {
elem = el;
}
return true;
},
f1: function (el, tagName) {
'use strict';
tagName = tagName || '';
if (el.id !== tagName.substr(1)) {
return false;
}
},
f5: function () {
'use strict';
return true;
},
f55: function (el, tagName, classes, attr_key) {
'use strict';
tagName = tagName || '';
classes = classes || [];
attr_key = attr_key || '';
var attrs = el.attributes;
return attrs.hasOwnProperty(attr_key);
},
f245: function (el, tagName, classes, attr_key, value) {
'use strict';
tagName = tagName || '';
classes = classes || [];
attr_key = (attr_key || '').toLowerCase();
value = value || '';
var attrs = el.attributes;
return Object.keys(attrs).some(function (key) {
var val = attrs[key];
return key.toLowerCase() === attr_key && val === value;
});
// for (let cls = classes, i = 0; i < cls.length; i++) {if (el.classNames.indexOf(cls[i]) === -1){ return false;}}
// return true;
},
f25: function (el, tagName, classes, attr_key, value) {
'use strict';
tagName = tagName || '';
classes = classes || [];
attr_key = (attr_key || '').toLowerCase();
value = value || '';
var attrs = el.attributes;
return Object.keys(attrs).some(function (key) {
var val = attrs[key];
return key.toLowerCase() === attr_key && val === value;
});
// return true;
},
f2: function (el, tagName, classes, attr_key, value) {
'use strict';
tagName = tagName || '';
classes = classes || [];
attr_key = (attr_key || '').toLowerCase();
value = value || '';
var attrs = el.attributes;
return Object.keys(attrs).some(function (key) {
var val = attrs[key];
return key.toLowerCase() === attr_key && val === value;
});
},
f345: function (el, tagName, classes) {
'use strict';
tagName = tagName || '';
classes = classes || [];
if (!compare_tagname(el.tagName, tagName)) {
return false;
}
for (var cls = classes, i = 0; i < cls.length; i++) {
if (el.classNames.indexOf(cls[i]) === -1) {
return false;
else {
var childs = getChildren(el);
if (childs && childs.length > 0) {
elem = findOne(test, childs);
}
}
return true;
},
f35: function (el, tagName) {
'use strict';
tagName = tagName || '';
return compare_tagname(el.tagName, tagName);
},
f3: function (el, tagName) {
'use strict';
tagName = tagName || '';
// if (el.tagName !== tagName) {
// return false;
// }
return compare_tagname(el.tagName, tagName);
}
return elem;
}
function findAll(test, nodes) {
var result = [];
for (var i = 0, j = nodes.length; i < j; i++) {
if (!isTag(nodes[i]))
continue;
if (test(nodes[i]))
result.push(nodes[i]);
var childs = getChildren(nodes[i]);
if (childs)
result = result.concat(findAll(test, childs));
}
return result;
}
exports.default = {
isTag: isTag,
getAttributeValue: getAttributeValue,
getName: getName,
getChildren: getChildren,
getParent: getParent,
getText: getText,
removeSubsets: removeSubsets,
existsOne: existsOne,
getSiblings: getSiblings,
hasAttrib: hasAttrib,
findOne: findOne,
findAll: findAll
};
/**
* Matcher class to make CSS match
*
* @class Matcher
*/
var Matcher = /** @class */ (function () {
/**
* Creates an instance of Matcher.
* @param {string} selector
*
* @memberof Matcher
*/
function Matcher(selector) {
this.nextMatch = 0;
this.matchers = selector.split(' ').map(function (matcher) {
if (pMatchFunctionCache[matcher]) {
return pMatchFunctionCache[matcher];
}
var parts = matcher.split('.');
var tagName = parts[0];
var classes = parts.slice(1).sort();
// let source = '"use strict";';
var function_name = 'f';
var attr_key = '';
var value = '';
if (tagName && tagName !== '*') {
if (tagName.startsWith('#')) {
// source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;';// 1
function_name += '1';
}
else {
// https://github.com/taoqf/node-html-parser/issues/86
// const reg = /\[\s*([\w-]+)(\s*=\s*(((?<quote>'|")\s*(.*)(\k<quote>))|(\S*)))?\s*\]/.exec(tagName);
// `[a-b]`,`[ a-b ]`,`[a-b=c]`, `[a-b=c'd]`,`[a-b='c\' d"e ']`,`[ a-b = 'c\' d"e ' ]`,`[a-b="c' d\"e " ]`,`[ a-b = "c' d\"e " ]`
var reg = /\[\s*([\w-]+)(\s*=\s*(('\s*(.*)'|"\s*(.*)")|(\S*)))?\s*\]/.exec(tagName);
if (reg) {
attr_key = reg[1];
value = reg[5] || reg[6] || reg[7];
// source += `let attrs = el.attributes;for (let key in attrs){const val = attrs[key]; if (key == "${attr_key}" && val == "${value}"){return true;}} return false;`;// 2
function_name += '2';
}
else {
// source += 'if (el.tagName != ' + JSON.stringify(tagName) + ') return false;';// 3
function_name += '3';
}
}
}
if (classes.length > 0) {
// source += 'for (let cls = ' + JSON.stringify(classes) + ', i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;';// 4
function_name += '4';
}
// source += 'return true;';// 5
function_name += '5';
var obj = {
func: functionCache[function_name],
tagName: tagName || '',
classes: classes || '',
attr_key: attr_key || '',
value: value || ''
};
// source = source || '';
return (pMatchFunctionCache[matcher] = obj);
});
}
/**
* Trying to advance match pointer
* @param {HTMLElement} el element to make the match
* @return {bool} true when pointer advanced.
*/
Matcher.prototype.advance = function (el) {
if (this.nextMatch < this.matchers.length &&
this.matchers[this.nextMatch].func(el, this.matchers[this.nextMatch].tagName, this.matchers[this.nextMatch].classes, this.matchers[this.nextMatch].attr_key, this.matchers[this.nextMatch].value)) {
this.nextMatch++;
return true;
}
return false;
};
/**
* Rewind the match pointer
*/
Matcher.prototype.rewind = function () {
this.nextMatch--;
};
Object.defineProperty(Matcher.prototype, "matched", {
/**
* Trying to determine if match made.
* @return {bool} true when the match is made
*/
get: function () {
return this.nextMatch === this.matchers.length;
},
enumerable: false,
configurable: true
});
/**
* Rest match pointer.
* @return {[type]} [description]
*/
Matcher.prototype.reset = function () {
this.nextMatch = 0;
};
/**
* flush cache to free memory
*/
Matcher.prototype.flushCache = function () {
pMatchFunctionCache = {};
};
return Matcher;
}());
exports.default = Matcher;
});

@@ -463,3 +319,3 @@ define("parse", ["require", "exports", "back", "nodes/html"], function (require, exports, back_1, html_1) {

});
define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "nodes/text", "matcher", "back", "nodes/comment", "parse"], function (require, exports, he_1, node_3, type_3, text_1, matcher_1, back_2, comment_1, parse_1) {
define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "nodes/type", "nodes/text", "matcher", "back", "nodes/comment", "parse"], function (require, exports, he_1, css_select_1, node_3, type_4, text_1, matcher_1, back_2, comment_1, parse_1) {
"use strict";

@@ -470,3 +326,3 @@ Object.defineProperty(exports, "__esModule", { value: true });

node_3 = __importDefault(node_3);
type_3 = __importDefault(type_3);
type_4 = __importDefault(type_4);
text_1 = __importDefault(text_1);

@@ -515,4 +371,3 @@ matcher_1 = __importDefault(matcher_1);

if (rawAttrs === void 0) { rawAttrs = ''; }
if (parentNode === void 0) { parentNode = null; }
var _this = _super.call(this) || this;
var _this = _super.call(this, parentNode) || this;
_this.rawAttrs = rawAttrs;

@@ -524,3 +379,3 @@ _this.parentNode = parentNode;

*/
_this.nodeType = type_3.default.ELEMENT_NODE;
_this.nodeType = type_4.default.ELEMENT_NODE;
_this.rawTagName = tagName;

@@ -609,3 +464,3 @@ _this.rawAttrs = rawAttrs || '';

set: function (val) {
var content = [new text_1.default(val)];
var content = [new text_1.default(val, this)];
this.childNodes = content;

@@ -636,3 +491,3 @@ },

function dfs(node) {
if (node.nodeType === type_3.default.ELEMENT_NODE) {
if (node.nodeType === type_4.default.ELEMENT_NODE) {
if (kBlockElements.get(node.rawTagName)) {

@@ -651,3 +506,3 @@ if (currentBlock.length > 0) {

}
else if (node.nodeType === type_3.default.TEXT_NODE) {
else if (node.nodeType === type_4.default.TEXT_NODE) {
if (node.isWhitespace) {

@@ -705,3 +560,3 @@ // Whitespace node, postponed output

var r = parse_1.default(content, options);
content = r.childNodes.length ? r.childNodes : [new text_1.default(content)];
content = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
}

@@ -725,3 +580,3 @@ this.childNodes = content;

var childNode = this.childNodes[i];
if (childNode.nodeType === type_3.default.ELEMENT_NODE) {
if (childNode.nodeType === type_4.default.ELEMENT_NODE) {
childNode.trimRight(pattern);

@@ -757,6 +612,6 @@ }

node.childNodes.forEach(function (childNode) {
if (childNode.nodeType === type_3.default.ELEMENT_NODE) {
if (childNode.nodeType === type_4.default.ELEMENT_NODE) {
dfs(childNode);
}
else if (childNode.nodeType === type_3.default.TEXT_NODE) {
else if (childNode.nodeType === type_4.default.TEXT_NODE) {
if (!childNode.isWhitespace) {

@@ -783,3 +638,3 @@ write('#text');

this.childNodes.forEach(function (node) {
if (node.nodeType === type_3.default.TEXT_NODE) {
if (node.nodeType === type_4.default.TEXT_NODE) {
if (node.isWhitespace) {

@@ -790,3 +645,3 @@ return;

}
else if (node.nodeType === type_3.default.ELEMENT_NODE) {
else if (node.nodeType === type_4.default.ELEMENT_NODE) {
node.removeWhitespace();

@@ -802,61 +657,66 @@ }

* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement[]} matching elements
*/
HTMLElement.prototype.querySelectorAll = function (selector) {
var _this = this;
var matcher;
if (selector instanceof matcher_1.default) {
matcher = selector;
matcher.reset();
}
else {
if (selector.includes(',')) {
var selectors = selector.split(',');
return Array.from(selectors.reduce(function (pre, cur) {
var result = _this.querySelectorAll(cur.trim());
return result.reduce(function (p, c) {
return p.add(c);
}, pre);
}, new Set()));
}
matcher = new matcher_1.default(selector);
}
var stack = [];
return this.childNodes.reduce(function (res, cur) {
stack.push([cur, 0, false]);
while (stack.length) {
var state = back_2.default(stack); // get last element
var el = state[0];
if (state[1] === 0) {
// Seen for first time.
if (el.nodeType !== type_3.default.ELEMENT_NODE) {
stack.pop();
continue;
}
var html_el = el;
state[2] = matcher.advance(html_el);
if (state[2]) {
if (matcher.matched) {
res.push(html_el);
res.push.apply(res, (html_el.querySelectorAll(selector)));
// no need to go further.
matcher.rewind();
stack.pop();
continue;
}
}
}
if (state[1] < el.childNodes.length) {
stack.push([el.childNodes[state[1]++], 0, false]);
}
else {
if (state[2]) {
matcher.rewind();
}
stack.pop();
}
}
return res;
}, []);
return css_select_1.selectAll(selector, this, {
xmlMode: true,
adapter: matcher_1.default
});
// let matcher: Matcher;
// if (selector instanceof Matcher) {
// matcher = selector;
// matcher.reset();
// } else {
// if (selector.includes(',')) {
// const selectors = selector.split(',');
// return Array.from(selectors.reduce((pre, cur) => {
// const result = this.querySelectorAll(cur.trim());
// return result.reduce((p, c) => {
// return p.add(c);
// }, pre);
// }, new Set<HTMLElement>()));
// }
// matcher = new Matcher(selector);
// }
// interface IStack {
// 0: Node; // node
// 1: number; // children
// 2: boolean; // found flag
// }
// const stack = [] as IStack[];
// return this.childNodes.reduce((res, cur) => {
// stack.push([cur, 0, false]);
// while (stack.length) {
// const state = arr_back(stack); // get last element
// const el = state[0];
// if (state[1] === 0) {
// // Seen for first time.
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
// stack.pop();
// continue;
// }
// const html_el = el as HTMLElement;
// state[2] = matcher.advance(html_el);
// if (state[2]) {
// if (matcher.matched) {
// res.push(html_el);
// res.push(...(html_el.querySelectorAll(selector)));
// // no need to go further.
// matcher.rewind();
// stack.pop();
// continue;
// }
// }
// }
// if (state[1] < el.childNodes.length) {
// stack.push([el.childNodes[state[1]++], 0, false]);
// } else {
// if (state[2]) {
// matcher.rewind();
// }
// stack.pop();
// }
// }
// return res;
// }, [] as HTMLElement[]);
};

@@ -866,46 +726,46 @@ /**

* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement} matching node
*/
HTMLElement.prototype.querySelector = function (selector) {
var matcher;
if (selector instanceof matcher_1.default) {
matcher = selector;
matcher.reset();
}
else {
matcher = new matcher_1.default(selector);
}
var stack = [];
for (var _i = 0, _a = this.childNodes; _i < _a.length; _i++) {
var node = _a[_i];
stack.push([node, 0, false]);
while (stack.length) {
var state = back_2.default(stack);
var el = state[0];
if (state[1] === 0) {
// Seen for first time.
if (el.nodeType !== type_3.default.ELEMENT_NODE) {
stack.pop();
continue;
}
state[2] = matcher.advance(el);
if (state[2]) {
if (matcher.matched) {
return el;
}
}
}
if (state[1] < el.childNodes.length) {
stack.push([el.childNodes[state[1]++], 0, false]);
}
else {
if (state[2]) {
matcher.rewind();
}
stack.pop();
}
}
}
return null;
return css_select_1.selectOne(selector, this, {
xmlMode: true,
adapter: matcher_1.default
});
// let matcher: Matcher;
// if (selector instanceof Matcher) {
// matcher = selector;
// matcher.reset();
// } else {
// matcher = new Matcher(selector);
// }
// const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
// for (const node of this.childNodes) {
// stack.push([node, 0, false]);
// while (stack.length) {
// const state = arr_back(stack);
// const el = state[0];
// if (state[1] === 0) {
// // Seen for first time.
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
// stack.pop();
// continue;
// }
// state[2] = matcher.advance(el as HTMLElement);
// if (state[2]) {
// if (matcher.matched) {
// return el as HTMLElement;
// }
// }
// }
// if (state[1] < el.childNodes.length) {
// stack.push([el.childNodes[state[1]++], 0, false]);
// } else {
// if (state[2]) {
// matcher.rewind();
// }
// stack.pop();
// }
// }
// }
// return null;
};

@@ -920,5 +780,3 @@ /**

this.childNodes.push(node);
if (node instanceof HTMLElement) {
node.parentNode = this;
}
node.parentNode = this;
return node;

@@ -1282,3 +1140,3 @@ };

}
var root = new HTMLElement(null, {});
var root = new HTMLElement(null, {}, '', null);
var currentParent = root;

@@ -1295,3 +1153,3 @@ var stack = [root];

var text = data.substring(lastTextPos, kMarkupPattern.lastIndex - match[0].length);
currentParent.appendChild(new text_1.default(text));
currentParent.appendChild(new text_1.default(text, currentParent));
}

@@ -1308,3 +1166,3 @@ }

var text = data.substring(lastTextPos - 3, lastTextPos - match[0].length + 4);
currentParent.appendChild(new comment_1.default(text));
currentParent.appendChild(new comment_1.default(text, currentParent));
}

@@ -1331,3 +1189,3 @@ return "continue";

// https://github.com/taoqf/node-html-parser/issues/38
currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3], null));
stack.push(currentParent);

@@ -1353,3 +1211,3 @@ if (is_block_text_element(match[2])) {

if (text.length > 0) {
currentParent.appendChild(new text_1.default(text));
currentParent.appendChild(new text_1.default(text, currentParent));
}

@@ -1411,3 +1269,3 @@ }

});
define("index", ["require", "exports", "nodes/comment", "nodes/html", "parse", "valid", "nodes/node", "nodes/text", "nodes/type"], function (require, exports, comment_2, html_3, parse_2, valid_1, node_4, text_2, type_4) {
define("index", ["require", "exports", "nodes/comment", "nodes/html", "parse", "valid", "nodes/node", "nodes/text", "nodes/type"], function (require, exports, comment_2, html_3, parse_2, valid_1, node_4, text_2, type_5) {
"use strict";

@@ -1423,3 +1281,3 @@ Object.defineProperty(exports, "__esModule", { value: true });

Object.defineProperty(exports, "TextNode", { enumerable: true, get: function () { return __importDefault(text_2).default; } });
Object.defineProperty(exports, "NodeType", { enumerable: true, get: function () { return __importDefault(type_4).default; } });
Object.defineProperty(exports, "NodeType", { enumerable: true, get: function () { return __importDefault(type_5).default; } });
});

@@ -0,41 +1,6 @@

import { Adapter } from 'css-select/lib/types';
import HTMLElement from './nodes/html';
/**
* Matcher class to make CSS match
*
* @class Matcher
*/
export default class Matcher {
private matchers;
private nextMatch;
/**
* Creates an instance of Matcher.
* @param {string} selector
*
* @memberof Matcher
*/
constructor(selector: string);
/**
* Trying to advance match pointer
* @param {HTMLElement} el element to make the match
* @return {bool} true when pointer advanced.
*/
advance(el: HTMLElement): boolean;
/**
* Rewind the match pointer
*/
rewind(): void;
/**
* Trying to determine if match made.
* @return {bool} true when the match is made
*/
get matched(): boolean;
/**
* Rest match pointer.
* @return {[type]} [description]
*/
reset(): void;
/**
* flush cache to free memory
*/
flushCache(): void;
}
import Node from './nodes/node';
export declare type Predicate = (node: Node) => node is HTMLElement;
declare const _default: Adapter<Node, HTMLElement>;
export default _default;
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
/**
* Cache to store generated match functions
* @type {Object}
*/
var pMatchFunctionCache = {};
function compare_tagname(tag1, tag2) {
if (!tag1) {
return !tag2;
}
if (!tag2) {
return !tag1;
}
return tag1.toLowerCase() === tag2.toLowerCase();
var type_1 = __importDefault(require("./nodes/type"));
function isTag(node) {
return node.nodeType === type_1.default.ELEMENT_NODE;
}
/**
* Function cache
*/
var functionCache = {
f145: function (el, tagName, classes) {
'use strict';
tagName = tagName || '';
classes = classes || [];
if (el.id !== tagName.substr(1)) {
return false;
}
for (var cls = classes, i = 0; i < cls.length; i++) {
if (el.classNames.indexOf(cls[i]) === -1) {
return false;
function getAttributeValue(elem, name) {
return elem.getAttribute(name);
}
function getName(elem) {
return (elem.rawTagName || '').toLowerCase();
}
function getChildren(node) {
return node.childNodes;
}
function getParent(node) {
return node.parentNode;
}
function getText(node) {
return node.text;
}
function removeSubsets(nodes) {
var idx = nodes.length;
var node;
var ancestor;
var replace;
// Check if each node (or one of its ancestors) is already contained in the
// array.
while (--idx > -1) {
node = ancestor = nodes[idx];
// Temporarily remove the node under consideration
nodes[idx] = null;
replace = true;
while (ancestor) {
if (nodes.indexOf(ancestor) > -1) {
replace = false;
nodes.splice(idx, 1);
break;
}
ancestor = getParent(ancestor);
}
return true;
},
f45: function (el, tagName, classes) {
'use strict';
tagName = tagName || '';
classes = classes || [];
for (var cls = classes, i = 0; i < cls.length; i++) {
if (el.classNames.indexOf(cls[i]) === -1) {
return false;
}
// If the node has been found to be unique, re-insert it.
if (replace) {
nodes[idx] = node;
}
return true;
},
f15: function (el, tagName) {
'use strict';
tagName = tagName || '';
if (el.id !== tagName.substr(1)) {
return false;
}
return nodes;
}
function existsOne(test, elems) {
return elems.some(function (elem) {
return isTag(elem) ? test(elem) || existsOne(test, getChildren(elem)) : false;
});
}
function getSiblings(node) {
var parent = getParent(node);
return parent && getChildren(parent);
}
function hasAttrib(elem, name) {
return getAttributeValue(elem, name) !== undefined;
}
function findOne(test, elems) {
var elem = null;
for (var i = 0, l = elems.length; i < l && !elem; i++) {
var el = elems[i];
if (test(el)) {
elem = el;
}
return true;
},
f1: function (el, tagName) {
'use strict';
tagName = tagName || '';
if (el.id !== tagName.substr(1)) {
return false;
}
},
f5: function () {
'use strict';
return true;
},
f55: function (el, tagName, classes, attr_key) {
'use strict';
tagName = tagName || '';
classes = classes || [];
attr_key = attr_key || '';
var attrs = el.attributes;
return attrs.hasOwnProperty(attr_key);
},
f245: function (el, tagName, classes, attr_key, value) {
'use strict';
tagName = tagName || '';
classes = classes || [];
attr_key = (attr_key || '').toLowerCase();
value = value || '';
var attrs = el.attributes;
return Object.keys(attrs).some(function (key) {
var val = attrs[key];
return key.toLowerCase() === attr_key && val === value;
});
// for (let cls = classes, i = 0; i < cls.length; i++) {if (el.classNames.indexOf(cls[i]) === -1){ return false;}}
// return true;
},
f25: function (el, tagName, classes, attr_key, value) {
'use strict';
tagName = tagName || '';
classes = classes || [];
attr_key = (attr_key || '').toLowerCase();
value = value || '';
var attrs = el.attributes;
return Object.keys(attrs).some(function (key) {
var val = attrs[key];
return key.toLowerCase() === attr_key && val === value;
});
// return true;
},
f2: function (el, tagName, classes, attr_key, value) {
'use strict';
tagName = tagName || '';
classes = classes || [];
attr_key = (attr_key || '').toLowerCase();
value = value || '';
var attrs = el.attributes;
return Object.keys(attrs).some(function (key) {
var val = attrs[key];
return key.toLowerCase() === attr_key && val === value;
});
},
f345: function (el, tagName, classes) {
'use strict';
tagName = tagName || '';
classes = classes || [];
if (!compare_tagname(el.tagName, tagName)) {
return false;
}
for (var cls = classes, i = 0; i < cls.length; i++) {
if (el.classNames.indexOf(cls[i]) === -1) {
return false;
else {
var childs = getChildren(el);
if (childs && childs.length > 0) {
elem = findOne(test, childs);
}
}
return true;
},
f35: function (el, tagName) {
'use strict';
tagName = tagName || '';
return compare_tagname(el.tagName, tagName);
},
f3: function (el, tagName) {
'use strict';
tagName = tagName || '';
// if (el.tagName !== tagName) {
// return false;
// }
return compare_tagname(el.tagName, tagName);
}
return elem;
}
function findAll(test, nodes) {
var result = [];
for (var i = 0, j = nodes.length; i < j; i++) {
if (!isTag(nodes[i]))
continue;
if (test(nodes[i]))
result.push(nodes[i]);
var childs = getChildren(nodes[i]);
if (childs)
result = result.concat(findAll(test, childs));
}
return result;
}
exports.default = {
isTag: isTag,
getAttributeValue: getAttributeValue,
getName: getName,
getChildren: getChildren,
getParent: getParent,
getText: getText,
removeSubsets: removeSubsets,
existsOne: existsOne,
getSiblings: getSiblings,
hasAttrib: hasAttrib,
findOne: findOne,
findAll: findAll
};
/**
* Matcher class to make CSS match
*
* @class Matcher
*/
var Matcher = /** @class */ (function () {
/**
* Creates an instance of Matcher.
* @param {string} selector
*
* @memberof Matcher
*/
function Matcher(selector) {
this.nextMatch = 0;
this.matchers = selector.split(' ').map(function (matcher) {
if (pMatchFunctionCache[matcher]) {
return pMatchFunctionCache[matcher];
}
var parts = matcher.split('.');
var tagName = parts[0];
var classes = parts.slice(1).sort();
// let source = '"use strict";';
var function_name = 'f';
var attr_key = '';
var value = '';
if (tagName && tagName !== '*') {
if (tagName.startsWith('#')) {
// source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;';// 1
function_name += '1';
}
else {
// https://github.com/taoqf/node-html-parser/issues/86
// const reg = /\[\s*([\w-]+)(\s*=\s*(((?<quote>'|")\s*(.*)(\k<quote>))|(\S*)))?\s*\]/.exec(tagName);
// `[a-b]`,`[ a-b ]`,`[a-b=c]`, `[a-b=c'd]`,`[a-b='c\' d"e ']`,`[ a-b = 'c\' d"e ' ]`,`[a-b="c' d\"e " ]`,`[ a-b = "c' d\"e " ]`
var reg = /\[\s*([\w-]+)(\s*=\s*(('\s*(.*)'|"\s*(.*)")|(\S*)))?\s*\]/.exec(tagName);
if (reg) {
attr_key = reg[1];
value = reg[5] || reg[6] || reg[7];
// source += `let attrs = el.attributes;for (let key in attrs){const val = attrs[key]; if (key == "${attr_key}" && val == "${value}"){return true;}} return false;`;// 2
function_name += '2';
}
else {
// source += 'if (el.tagName != ' + JSON.stringify(tagName) + ') return false;';// 3
function_name += '3';
}
}
}
if (classes.length > 0) {
// source += 'for (let cls = ' + JSON.stringify(classes) + ', i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;';// 4
function_name += '4';
}
// source += 'return true;';// 5
function_name += '5';
var obj = {
func: functionCache[function_name],
tagName: tagName || '',
classes: classes || '',
attr_key: attr_key || '',
value: value || ''
};
// source = source || '';
return (pMatchFunctionCache[matcher] = obj);
});
}
/**
* Trying to advance match pointer
* @param {HTMLElement} el element to make the match
* @return {bool} true when pointer advanced.
*/
Matcher.prototype.advance = function (el) {
if (this.nextMatch < this.matchers.length &&
this.matchers[this.nextMatch].func(el, this.matchers[this.nextMatch].tagName, this.matchers[this.nextMatch].classes, this.matchers[this.nextMatch].attr_key, this.matchers[this.nextMatch].value)) {
this.nextMatch++;
return true;
}
return false;
};
/**
* Rewind the match pointer
*/
Matcher.prototype.rewind = function () {
this.nextMatch--;
};
Object.defineProperty(Matcher.prototype, "matched", {
/**
* Trying to determine if match made.
* @return {bool} true when the match is made
*/
get: function () {
return this.nextMatch === this.matchers.length;
},
enumerable: false,
configurable: true
});
/**
* Rest match pointer.
* @return {[type]} [description]
*/
Matcher.prototype.reset = function () {
this.nextMatch = 0;
};
/**
* flush cache to free memory
*/
Matcher.prototype.flushCache = function () {
pMatchFunctionCache = {};
};
return Matcher;
}());
exports.default = Matcher;

@@ -5,3 +5,3 @@ import Node from './node';

rawText: string;
constructor(rawText: string);
constructor(rawText: string, parentNode: Node);
/**

@@ -8,0 +8,0 @@ * Node Type declaration.

@@ -25,4 +25,4 @@ "use strict";

__extends(CommentNode, _super);
function CommentNode(rawText) {
var _this = _super.call(this) || this;
function CommentNode(rawText, parentNode) {
var _this = _super.call(this, parentNode) || this;
_this.rawText = rawText;

@@ -29,0 +29,0 @@ /**

import Node from './node';
import NodeType from './type';
import Matcher from '../matcher';
export interface KeyAttributes {

@@ -26,3 +25,3 @@ id?: string;

private rawAttrs;
parentNode: HTMLElement;
parentNode: HTMLElement | null;
private _attrs;

@@ -44,3 +43,3 @@ private _rawAttrs;

*/
constructor(tagName: string, keyAttrs: KeyAttributes, rawAttrs?: string, parentNode?: HTMLElement);
constructor(tagName: string, keyAttrs: KeyAttributes, rawAttrs: string, parentNode: HTMLElement | null);
/**

@@ -102,13 +101,11 @@ * Remove current element

* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement[]} matching elements
*/
querySelectorAll(selector: string | Matcher): HTMLElement[];
querySelectorAll(selector: string): HTMLElement[];
/**
* Query CSS Selector to find matching node.
* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement} matching node
*/
querySelector(selector: string | Matcher): HTMLElement;
querySelector(selector: string): HTMLElement;
/**

@@ -115,0 +112,0 @@ * Append a child node to childNodes

@@ -28,2 +28,3 @@ "use strict";

var he_1 = __importDefault(require("he"));
var css_select_1 = require("css-select");
var node_1 = __importDefault(require("./node"));

@@ -74,4 +75,3 @@ var type_1 = __importDefault(require("./type"));

if (rawAttrs === void 0) { rawAttrs = ''; }
if (parentNode === void 0) { parentNode = null; }
var _this = _super.call(this) || this;
var _this = _super.call(this, parentNode) || this;
_this.rawAttrs = rawAttrs;

@@ -167,3 +167,3 @@ _this.parentNode = parentNode;

set: function (val) {
var content = [new text_1.default(val)];
var content = [new text_1.default(val, this)];
this.childNodes = content;

@@ -261,3 +261,3 @@ },

var r = parse_1.default(content, options);
content = r.childNodes.length ? r.childNodes : [new text_1.default(content)];
content = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
}

@@ -354,61 +354,66 @@ this.childNodes = content;

* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement[]} matching elements
*/
HTMLElement.prototype.querySelectorAll = function (selector) {
var _this = this;
var matcher;
if (selector instanceof matcher_1.default) {
matcher = selector;
matcher.reset();
}
else {
if (selector.includes(',')) {
var selectors = selector.split(',');
return Array.from(selectors.reduce(function (pre, cur) {
var result = _this.querySelectorAll(cur.trim());
return result.reduce(function (p, c) {
return p.add(c);
}, pre);
}, new Set()));
}
matcher = new matcher_1.default(selector);
}
var stack = [];
return this.childNodes.reduce(function (res, cur) {
stack.push([cur, 0, false]);
while (stack.length) {
var state = back_1.default(stack); // get last element
var el = state[0];
if (state[1] === 0) {
// Seen for first time.
if (el.nodeType !== type_1.default.ELEMENT_NODE) {
stack.pop();
continue;
}
var html_el = el;
state[2] = matcher.advance(html_el);
if (state[2]) {
if (matcher.matched) {
res.push(html_el);
res.push.apply(res, (html_el.querySelectorAll(selector)));
// no need to go further.
matcher.rewind();
stack.pop();
continue;
}
}
}
if (state[1] < el.childNodes.length) {
stack.push([el.childNodes[state[1]++], 0, false]);
}
else {
if (state[2]) {
matcher.rewind();
}
stack.pop();
}
}
return res;
}, []);
return css_select_1.selectAll(selector, this, {
xmlMode: true,
adapter: matcher_1.default
});
// let matcher: Matcher;
// if (selector instanceof Matcher) {
// matcher = selector;
// matcher.reset();
// } else {
// if (selector.includes(',')) {
// const selectors = selector.split(',');
// return Array.from(selectors.reduce((pre, cur) => {
// const result = this.querySelectorAll(cur.trim());
// return result.reduce((p, c) => {
// return p.add(c);
// }, pre);
// }, new Set<HTMLElement>()));
// }
// matcher = new Matcher(selector);
// }
// interface IStack {
// 0: Node; // node
// 1: number; // children
// 2: boolean; // found flag
// }
// const stack = [] as IStack[];
// return this.childNodes.reduce((res, cur) => {
// stack.push([cur, 0, false]);
// while (stack.length) {
// const state = arr_back(stack); // get last element
// const el = state[0];
// if (state[1] === 0) {
// // Seen for first time.
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
// stack.pop();
// continue;
// }
// const html_el = el as HTMLElement;
// state[2] = matcher.advance(html_el);
// if (state[2]) {
// if (matcher.matched) {
// res.push(html_el);
// res.push(...(html_el.querySelectorAll(selector)));
// // no need to go further.
// matcher.rewind();
// stack.pop();
// continue;
// }
// }
// }
// if (state[1] < el.childNodes.length) {
// stack.push([el.childNodes[state[1]++], 0, false]);
// } else {
// if (state[2]) {
// matcher.rewind();
// }
// stack.pop();
// }
// }
// return res;
// }, [] as HTMLElement[]);
};

@@ -418,46 +423,46 @@ /**

* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement} matching node
*/
HTMLElement.prototype.querySelector = function (selector) {
var matcher;
if (selector instanceof matcher_1.default) {
matcher = selector;
matcher.reset();
}
else {
matcher = new matcher_1.default(selector);
}
var stack = [];
for (var _i = 0, _a = this.childNodes; _i < _a.length; _i++) {
var node = _a[_i];
stack.push([node, 0, false]);
while (stack.length) {
var state = back_1.default(stack);
var el = state[0];
if (state[1] === 0) {
// Seen for first time.
if (el.nodeType !== type_1.default.ELEMENT_NODE) {
stack.pop();
continue;
}
state[2] = matcher.advance(el);
if (state[2]) {
if (matcher.matched) {
return el;
}
}
}
if (state[1] < el.childNodes.length) {
stack.push([el.childNodes[state[1]++], 0, false]);
}
else {
if (state[2]) {
matcher.rewind();
}
stack.pop();
}
}
}
return null;
return css_select_1.selectOne(selector, this, {
xmlMode: true,
adapter: matcher_1.default
});
// let matcher: Matcher;
// if (selector instanceof Matcher) {
// matcher = selector;
// matcher.reset();
// } else {
// matcher = new Matcher(selector);
// }
// const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
// for (const node of this.childNodes) {
// stack.push([node, 0, false]);
// while (stack.length) {
// const state = arr_back(stack);
// const el = state[0];
// if (state[1] === 0) {
// // Seen for first time.
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
// stack.pop();
// continue;
// }
// state[2] = matcher.advance(el as HTMLElement);
// if (state[2]) {
// if (matcher.matched) {
// return el as HTMLElement;
// }
// }
// }
// if (state[1] < el.childNodes.length) {
// stack.push([el.childNodes[state[1]++], 0, false]);
// } else {
// if (state[2]) {
// matcher.rewind();
// }
// stack.pop();
// }
// }
// }
// return null;
};

@@ -472,5 +477,3 @@ /**

this.childNodes.push(node);
if (node instanceof HTMLElement) {
node.parentNode = this;
}
node.parentNode = this;
return node;

@@ -834,3 +837,3 @@ };

}
var root = new HTMLElement(null, {});
var root = new HTMLElement(null, {}, '', null);
var currentParent = root;

@@ -847,3 +850,3 @@ var stack = [root];

var text = data.substring(lastTextPos, kMarkupPattern.lastIndex - match[0].length);
currentParent.appendChild(new text_1.default(text));
currentParent.appendChild(new text_1.default(text, currentParent));
}

@@ -860,3 +863,3 @@ }

var text = data.substring(lastTextPos - 3, lastTextPos - match[0].length + 4);
currentParent.appendChild(new comment_1.default(text));
currentParent.appendChild(new comment_1.default(text, currentParent));
}

@@ -883,3 +886,3 @@ return "continue";

// https://github.com/taoqf/node-html-parser/issues/38
currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3], null));
stack.push(currentParent);

@@ -905,3 +908,3 @@ if (is_block_text_element(match[2])) {

if (text.length > 0) {
currentParent.appendChild(new text_1.default(text));
currentParent.appendChild(new text_1.default(text, currentParent));
}

@@ -908,0 +911,0 @@ }

@@ -6,2 +6,3 @@ import NodeType from './type';

export default abstract class Node {
parentNode: Node;
abstract nodeType: NodeType;

@@ -12,2 +13,3 @@ childNodes: Node[];

abstract toString(): string;
constructor(parentNode?: Node);
get innerText(): string;

@@ -14,0 +16,0 @@ get textContent(): string;

@@ -7,3 +7,5 @@ "use strict";

var Node = /** @class */ (function () {
function Node() {
function Node(parentNode) {
if (parentNode === void 0) { parentNode = null; }
this.parentNode = parentNode;
this.childNodes = [];

@@ -10,0 +12,0 @@ }

@@ -9,3 +9,3 @@ import NodeType from './type';

rawText: string;
constructor(rawText: string);
constructor(rawText: string, parentNode: Node);
/**

@@ -12,0 +12,0 @@ * Node Type declaration.

@@ -29,4 +29,4 @@ "use strict";

__extends(TextNode, _super);
function TextNode(rawText) {
var _this = _super.call(this) || this;
function TextNode(rawText, parentNode) {
var _this = _super.call(this, parentNode) || this;
_this.rawText = rawText;

@@ -33,0 +33,0 @@ /**

{
"name": "node-html-parser",
"version": "2.2.1",
"version": "3.0.0",
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",

@@ -31,2 +31,3 @@ "main": "dist/index.js",

"dependencies": {
"css-select": "^3.1.2",
"he": "1.2.0"

@@ -33,0 +34,0 @@ },

@@ -129,2 +129,6 @@ # Fast HTML Parser [![NPM version](https://badge.fury.io/js/node-html-parser.png)](http://badge.fury.io/js/node-html-parser) [![Build Status](https://travis-ci.org/taoqf/node-html-parser.svg?branch=master)](https://travis-ci.org/taoqf/node-html-parser)

### HTMLElement#setAttributes(attrs: Record<string, string>)
Set attributes of the element.
### HTMLElement#removeAttribute(key: string)

@@ -209,1 +213,5 @@

Get or Set textContent of current element, more efficient than [set_content](#htmlelementset_contentcontent-string--node--node).
### HTMLElement#attributes
Get all attributes of current element. **Notice: do not try to change the returned value.**
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc