Socket
Socket
Sign inDemoInstall

node-html-parser

Package Overview
Dependencies
Maintainers
1
Versions
119
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

node-html-parser - npm Package Compare versions

Comparing version 1.1.20 to 1.2.0

.eslintrc.js

249

dist/index.d.ts

@@ -1,231 +0,14 @@

export declare enum NodeType {
ELEMENT_NODE = 1,
TEXT_NODE = 3,
COMMENT_NODE = 8
export { default as HTMLElement } from './nodes/html';
import HTMLElement from './nodes/html';
import TextNode from './nodes/text';
export { default as Node } from './nodes/node';
export interface Options {
lowerCaseTagName?: boolean;
noFix?: boolean;
script?: boolean;
style?: boolean;
pre?: boolean;
comment?: boolean;
}
/**
* Node Class as base class for TextNode and HTMLElement.
*/
export declare abstract class Node {
nodeType: NodeType;
childNodes: Node[];
text: string;
rawText: string;
abstract toString(): String;
}
/**
* TextNode to contain a text element in DOM tree.
* @param {string} value [description]
*/
export declare class TextNode extends Node {
constructor(value: string);
/**
* Node Type declaration.
* @type {Number}
*/
nodeType: NodeType;
/**
* Get unescaped text value of current node and its children.
* @return {string} text content
*/
get text(): string;
/**
* Detect if the node contains only white space.
* @return {bool}
*/
get isWhitespace(): boolean;
toString(): string;
}
export declare class CommentNode extends Node {
constructor(value: string);
/**
* Node Type declaration.
* @type {Number}
*/
nodeType: NodeType;
/**
* Get unescaped text value of current node and its children.
* @return {string} text content
*/
get text(): string;
toString(): string;
}
export interface KeyAttributes {
id?: string;
class?: string;
}
export interface Attributes {
[key: string]: string;
}
export interface RawAttributes {
[key: string]: string;
}
/**
* HTMLElement, which contains a set of children.
*
* Note: this is a minimalist implementation, no complete tree
* structure provided (no parentNode, nextSibling,
* previousSibling etc).
* @class HTMLElement
* @extends {Node}
*/
export declare class HTMLElement extends Node {
tagName: string;
private rawAttrs;
parentNode: Node;
private _attrs;
private _rawAttrs;
id: string;
classNames: string[];
/**
* Node Type declaration.
*/
nodeType: NodeType;
/**
* Creates an instance of HTMLElement.
* @param keyAttrs id and class attribute
* @param [rawAttrs] attributes in string
*
* @memberof HTMLElement
*/
constructor(tagName: string, keyAttrs: KeyAttributes, rawAttrs?: string, parentNode?: Node);
/**
* Remove Child element from childNodes array
* @param {HTMLElement} node node to remove
*/
removeChild(node: Node): void;
/**
* Exchanges given child with new child
* @param {HTMLElement} oldNode node to exchange
* @param {HTMLElement} newNode new node
*/
exchangeChild(oldNode: Node, newNode: Node): void;
/**
* Get escpaed (as-it) text value of current node and its children.
* @return {string} text content
*/
get rawText(): string;
/**
* Get unescaped text value of current node and its children.
* @return {string} text content
*/
get text(): string;
/**
* Get structured Text (with '\n' etc.)
* @return {string} structured text
*/
get structuredText(): string;
toString(): string;
get innerHTML(): string;
set_content(content: string | Node | Node[]): void;
get outerHTML(): string;
/**
* Trim element from right (in block) after seeing pattern in a TextNode.
* @param {RegExp} pattern pattern to find
* @return {HTMLElement} reference to current node
*/
trimRight(pattern: RegExp): this;
/**
* Get DOM structure
* @return {string} strucutre
*/
get structure(): string;
/**
* Remove whitespaces in this sub tree.
* @return {HTMLElement} pointer to this
*/
removeWhitespace(): this;
/**
* Query CSS selector to find matching nodes.
* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement[]} matching elements
*/
querySelectorAll(selector: string | Matcher): HTMLElement[];
/**
* Query CSS Selector to find matching node.
* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement} matching node
*/
querySelector(selector: string | Matcher): HTMLElement;
/**
* Append a child node to childNodes
* @param {Node} node node to append
* @return {Node} node appended
*/
appendChild<T extends Node = Node>(node: T): T;
/**
* Get first child node
* @return {Node} first child node
*/
get firstChild(): Node;
/**
* Get last child node
* @return {Node} last child node
*/
get lastChild(): Node;
/**
* Get attributes
* @return {Object} parsed and unescaped attributes
*/
get attributes(): Attributes;
/**
* Get escaped (as-it) attributes
* @return {Object} parsed attributes
*/
get rawAttributes(): RawAttributes;
/**
* Set an attribute value to the HTMLElement
* @param {string} key The attribute name
* @param {string} value The value to set, or null / undefined to remove an attribute
*/
setAttribute(key: string, value: string): void;
/**
* Replace all the attributes of the HTMLElement by the provided attributes
* @param {Attributes} attributes the new attribute set
*/
setAttributes(attributes: Attributes): void;
}
/**
* Matcher class to make CSS match
*
* @class Matcher
*/
export declare class Matcher {
private matchers;
private nextMatch;
/**
* Creates an instance of Matcher.
* @param {string} selector
*
* @memberof Matcher
*/
constructor(selector: string);
/**
* Trying to advance match pointer
* @param {HTMLElement} el element to make the match
* @return {bool} true when pointer advanced.
*/
advance(el: Node): boolean;
/**
* Rewind the match pointer
*/
rewind(): void;
/**
* Trying to determine if match made.
* @return {bool} true when the match is made
*/
get matched(): boolean;
/**
* Rest match pointer.
* @return {[type]} [description]
*/
reset(): void;
/**
* flush cache to free memory
*/
flushCache(): void;
}
/**
* Parses HTML and returns a root element

@@ -236,10 +19,3 @@ * Parse a chuck of HTML source.

*/
export declare function parse(data: string, options?: {
lowerCaseTagName?: boolean;
noFix?: boolean;
script?: boolean;
style?: boolean;
pre?: boolean;
comment?: boolean;
}): (TextNode & {
export declare function parse(data: string, options?: Options): (TextNode & {
valid: boolean;

@@ -249,1 +25,2 @@ }) | (HTMLElement & {

});
export default parse;
"use strict";
var __extends = (this && this.__extends) || (function () {
var extendStatics = function (d, b) {
extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
return extendStatics(d, b);
};
return function (d, b) {
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
var he_1 = require("he");
var NodeType;
(function (NodeType) {
NodeType[NodeType["ELEMENT_NODE"] = 1] = "ELEMENT_NODE";
NodeType[NodeType["TEXT_NODE"] = 3] = "TEXT_NODE";
NodeType[NodeType["COMMENT_NODE"] = 8] = "COMMENT_NODE";
})(NodeType = exports.NodeType || (exports.NodeType = {}));
/**
* Node Class as base class for TextNode and HTMLElement.
*/
var Node = /** @class */ (function () {
function Node() {
this.childNodes = [];
}
return Node;
}());
exports.Node = Node;
/**
* TextNode to contain a text element in DOM tree.
* @param {string} value [description]
*/
var TextNode = /** @class */ (function (_super) {
__extends(TextNode, _super);
function TextNode(value) {
var _this = _super.call(this) || this;
/**
* Node Type declaration.
* @type {Number}
*/
_this.nodeType = NodeType.TEXT_NODE;
_this.rawText = value;
return _this;
}
Object.defineProperty(TextNode.prototype, "text", {
/**
* Get unescaped text value of current node and its children.
* @return {string} text content
*/
get: function () {
return he_1.decode(this.rawText);
},
enumerable: true,
configurable: true
});
Object.defineProperty(TextNode.prototype, "isWhitespace", {
/**
* Detect if the node contains only white space.
* @return {bool}
*/
get: function () {
return /^(\s|&nbsp;)*$/.test(this.rawText);
},
enumerable: true,
configurable: true
});
TextNode.prototype.toString = function () {
return this.text;
};
return TextNode;
}(Node));
exports.TextNode = TextNode;
var CommentNode = /** @class */ (function (_super) {
__extends(CommentNode, _super);
function CommentNode(value) {
var _this = _super.call(this) || this;
/**
* Node Type declaration.
* @type {Number}
*/
_this.nodeType = NodeType.COMMENT_NODE;
_this.rawText = value;
return _this;
}
Object.defineProperty(CommentNode.prototype, "text", {
/**
* Get unescaped text value of current node and its children.
* @return {string} text content
*/
get: function () {
return he_1.decode(this.rawText);
},
enumerable: true,
configurable: true
});
CommentNode.prototype.toString = function () {
return "<!--" + this.rawText + "-->";
};
return CommentNode;
}(Node));
exports.CommentNode = CommentNode;
var kBlockElements = {
div: true,
p: true,
// ul: true,
// ol: true,
li: true,
// table: true,
// tr: true,
td: true,
section: true,
br: true
};
function arr_back(arr) {
return arr[arr.length - 1];
}
/**
* HTMLElement, which contains a set of children.
*
* Note: this is a minimalist implementation, no complete tree
* structure provided (no parentNode, nextSibling,
* previousSibling etc).
* @class HTMLElement
* @extends {Node}
*/
var HTMLElement = /** @class */ (function (_super) {
__extends(HTMLElement, _super);
/**
* Creates an instance of HTMLElement.
* @param keyAttrs id and class attribute
* @param [rawAttrs] attributes in string
*
* @memberof HTMLElement
*/
function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode) {
if (rawAttrs === void 0) { rawAttrs = ''; }
if (parentNode === void 0) { parentNode = null; }
var _this = _super.call(this) || this;
_this.tagName = tagName;
_this.rawAttrs = rawAttrs;
_this.parentNode = parentNode;
_this.classNames = [];
/**
* Node Type declaration.
*/
_this.nodeType = NodeType.ELEMENT_NODE;
_this.rawAttrs = rawAttrs || '';
_this.parentNode = parentNode || null;
_this.childNodes = [];
if (keyAttrs.id) {
_this.id = keyAttrs.id;
}
if (keyAttrs.class) {
_this.classNames = keyAttrs.class.split(/\s+/);
}
return _this;
}
/**
* Remove Child element from childNodes array
* @param {HTMLElement} node node to remove
*/
HTMLElement.prototype.removeChild = function (node) {
this.childNodes = this.childNodes.filter(function (child) {
return (child !== node);
});
};
/**
* Exchanges given child with new child
* @param {HTMLElement} oldNode node to exchange
* @param {HTMLElement} newNode new node
*/
HTMLElement.prototype.exchangeChild = function (oldNode, newNode) {
var idx = -1;
for (var i = 0; i < this.childNodes.length; i++) {
if (this.childNodes[i] === oldNode) {
idx = i;
break;
}
}
this.childNodes[idx] = newNode;
};
Object.defineProperty(HTMLElement.prototype, "rawText", {
/**
* Get escpaed (as-it) text value of current node and its children.
* @return {string} text content
*/
get: function () {
var res = '';
for (var i = 0; i < this.childNodes.length; i++)
res += this.childNodes[i].rawText;
return res;
},
enumerable: true,
configurable: true
});
Object.defineProperty(HTMLElement.prototype, "text", {
/**
* Get unescaped text value of current node and its children.
* @return {string} text content
*/
get: function () {
return he_1.decode(this.rawText);
},
enumerable: true,
configurable: true
});
Object.defineProperty(HTMLElement.prototype, "structuredText", {
/**
* Get structured Text (with '\n' etc.)
* @return {string} structured text
*/
get: function () {
var currentBlock = [];
var blocks = [currentBlock];
function dfs(node) {
if (node.nodeType === NodeType.ELEMENT_NODE) {
if (kBlockElements[node.tagName]) {
if (currentBlock.length > 0) {
blocks.push(currentBlock = []);
}
node.childNodes.forEach(dfs);
if (currentBlock.length > 0) {
blocks.push(currentBlock = []);
}
}
else {
node.childNodes.forEach(dfs);
}
}
else if (node.nodeType === NodeType.TEXT_NODE) {
if (node.isWhitespace) {
// Whitespace node, postponed output
currentBlock.prependWhitespace = true;
}
else {
var text = node.text;
if (currentBlock.prependWhitespace) {
text = ' ' + text;
currentBlock.prependWhitespace = false;
}
currentBlock.push(text);
}
}
}
dfs(this);
return blocks
.map(function (block) {
// Normalize each line's whitespace
return block.join('').trim().replace(/\s{2,}/g, ' ');
})
.join('\n').replace(/\s+$/, ''); // trimRight;
},
enumerable: true,
configurable: true
});
HTMLElement.prototype.toString = function () {
var tag = this.tagName;
if (tag) {
var is_un_closed = /^meta$/i.test(tag);
var is_self_closed = /^(img|br|hr|area|base|input|doctype|link)$/i.test(tag);
var attrs = this.rawAttrs ? ' ' + this.rawAttrs : '';
if (is_un_closed) {
return "<" + tag + attrs + ">";
}
else if (is_self_closed) {
return "<" + tag + attrs + " />";
}
else {
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
}
}
else {
return this.innerHTML;
}
};
Object.defineProperty(HTMLElement.prototype, "innerHTML", {
get: function () {
return this.childNodes.map(function (child) {
return child.toString();
}).join('');
},
enumerable: true,
configurable: true
});
HTMLElement.prototype.set_content = function (content) {
if (content instanceof Node) {
content = [content];
}
else if (typeof content == 'string') {
var r = parse(content);
content = r.childNodes.length ? r.childNodes : [new TextNode(content)];
}
this.childNodes = content;
};
Object.defineProperty(HTMLElement.prototype, "outerHTML", {
get: function () {
return this.toString();
},
enumerable: true,
configurable: true
});
/**
* Trim element from right (in block) after seeing pattern in a TextNode.
* @param {RegExp} pattern pattern to find
* @return {HTMLElement} reference to current node
*/
HTMLElement.prototype.trimRight = function (pattern) {
for (var i = 0; i < this.childNodes.length; i++) {
var childNode = this.childNodes[i];
if (childNode.nodeType === NodeType.ELEMENT_NODE) {
childNode.trimRight(pattern);
}
else {
var index = childNode.rawText.search(pattern);
if (index > -1) {
childNode.rawText = childNode.rawText.substr(0, index);
// trim all following nodes.
this.childNodes.length = i + 1;
}
}
}
return this;
};
Object.defineProperty(HTMLElement.prototype, "structure", {
/**
* Get DOM structure
* @return {string} strucutre
*/
get: function () {
var res = [];
var indention = 0;
function write(str) {
res.push(' '.repeat(indention) + str);
}
function dfs(node) {
var idStr = node.id ? ('#' + node.id) : '';
var classStr = node.classNames.length ? ('.' + node.classNames.join('.')) : '';
write(node.tagName + idStr + classStr);
indention++;
for (var i = 0; i < node.childNodes.length; i++) {
var childNode = node.childNodes[i];
if (childNode.nodeType === NodeType.ELEMENT_NODE) {
dfs(childNode);
}
else if (childNode.nodeType === NodeType.TEXT_NODE) {
if (!childNode.isWhitespace)
write('#text');
}
}
indention--;
}
dfs(this);
return res.join('\n');
},
enumerable: true,
configurable: true
});
/**
* Remove whitespaces in this sub tree.
* @return {HTMLElement} pointer to this
*/
HTMLElement.prototype.removeWhitespace = function () {
var o = 0;
for (var i = 0; i < this.childNodes.length; i++) {
var node = this.childNodes[i];
if (node.nodeType === NodeType.TEXT_NODE) {
if (node.isWhitespace)
continue;
node.rawText = node.rawText.trim();
}
else if (node.nodeType === NodeType.ELEMENT_NODE) {
node.removeWhitespace();
}
this.childNodes[o++] = node;
}
this.childNodes.length = o;
return this;
};
/**
* Query CSS selector to find matching nodes.
* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement[]} matching elements
*/
HTMLElement.prototype.querySelectorAll = function (selector) {
var _this = this;
var matcher;
if (selector instanceof Matcher) {
matcher = selector;
matcher.reset();
}
else {
if (selector.includes(',')) {
var selectors = selector.split(',');
return Array.from(selectors.reduce(function (pre, cur) {
var result = _this.querySelectorAll(cur.trim());
return result.reduce(function (p, c) {
return p.add(c);
}, pre);
}, new Set()));
}
matcher = new Matcher(selector);
}
var res = [];
var stack = [];
for (var i = 0; i < this.childNodes.length; i++) {
stack.push([this.childNodes[i], 0, false]);
while (stack.length) {
var state = arr_back(stack);
var el = state[0];
if (state[1] === 0) {
// Seen for first time.
if (el.nodeType !== NodeType.ELEMENT_NODE) {
stack.pop();
continue;
}
if (state[2] = matcher.advance(el)) {
if (matcher.matched) {
res.push(el);
// no need to go further.
matcher.rewind();
stack.pop();
continue;
}
}
}
if (state[1] < el.childNodes.length) {
stack.push([el.childNodes[state[1]++], 0, false]);
}
else {
if (state[2])
matcher.rewind();
stack.pop();
}
}
}
return res;
};
/**
* Query CSS Selector to find matching node.
* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement} matching node
*/
HTMLElement.prototype.querySelector = function (selector) {
var matcher;
if (selector instanceof Matcher) {
matcher = selector;
matcher.reset();
}
else {
matcher = new Matcher(selector);
}
var stack = [];
for (var i = 0; i < this.childNodes.length; i++) {
stack.push([this.childNodes[i], 0, false]);
while (stack.length) {
var state = arr_back(stack);
var el = state[0];
if (state[1] === 0) {
// Seen for first time.
if (el.nodeType !== NodeType.ELEMENT_NODE) {
stack.pop();
continue;
}
if (state[2] = matcher.advance(el)) {
if (matcher.matched) {
return el;
}
}
}
if (state[1] < el.childNodes.length) {
stack.push([el.childNodes[state[1]++], 0, false]);
}
else {
if (state[2])
matcher.rewind();
stack.pop();
}
}
}
return null;
};
/**
* Append a child node to childNodes
* @param {Node} node node to append
* @return {Node} node appended
*/
HTMLElement.prototype.appendChild = function (node) {
// node.parentNode = this;
this.childNodes.push(node);
if (node instanceof HTMLElement) {
node.parentNode = this;
}
return node;
};
Object.defineProperty(HTMLElement.prototype, "firstChild", {
/**
* Get first child node
* @return {Node} first child node
*/
get: function () {
return this.childNodes[0];
},
enumerable: true,
configurable: true
});
Object.defineProperty(HTMLElement.prototype, "lastChild", {
/**
* Get last child node
* @return {Node} last child node
*/
get: function () {
return arr_back(this.childNodes);
},
enumerable: true,
configurable: true
});
Object.defineProperty(HTMLElement.prototype, "attributes", {
/**
* Get attributes
* @return {Object} parsed and unescaped attributes
*/
get: function () {
if (this._attrs)
return this._attrs;
this._attrs = {};
var attrs = this.rawAttributes;
for (var key in attrs) {
this._attrs[key] = he_1.decode(attrs[key]);
}
return this._attrs;
},
enumerable: true,
configurable: true
});
Object.defineProperty(HTMLElement.prototype, "rawAttributes", {
/**
* Get escaped (as-it) attributes
* @return {Object} parsed attributes
*/
get: function () {
if (this._rawAttrs)
return this._rawAttrs;
var attrs = {};
if (this.rawAttrs) {
var re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
var match = void 0;
while (match = re.exec(this.rawAttrs)) {
attrs[match[1]] = match[2] || match[3] || match[4] || "";
}
}
this._rawAttrs = attrs;
return attrs;
},
enumerable: true,
configurable: true
});
/**
* Set an attribute value to the HTMLElement
* @param {string} key The attribute name
* @param {string} value The value to set, or null / undefined to remove an attribute
*/
HTMLElement.prototype.setAttribute = function (key, value) {
//Update the attributes map
var attrs = this.attributes;
if (value === undefined || value === null)
delete attrs[key];
else
attrs[key] = value + '';
//Update the raw attributes
if (this._rawAttrs) {
if (value === undefined || value === null)
delete this._rawAttrs[key];
else
this._rawAttrs[key] = he_1.encode(value + '');
}
//Update rawString
this.rawAttrs = Object.keys(attrs).map(function (attr) { return attr + '=' + he_1.encode(attrs[attr]); }).join(' ');
};
/**
* Replace all the attributes of the HTMLElement by the provided attributes
* @param {Attributes} attributes the new attribute set
*/
HTMLElement.prototype.setAttributes = function (attributes) {
var _this = this;
//Update the attributes map
if (this.attributes) {
Object.keys(this.attributes).forEach(function (key) { return delete _this.attributes[key]; });
Object.keys(attributes).forEach(function (key) { return _this.attributes[key] = attributes[key] + ''; });
}
//Update the raw attributes map
if (this.rawAttributes) {
Object.keys(this.rawAttributes).forEach(function (key) { return delete _this.rawAttributes[key]; });
Object.keys(attributes).forEach(function (key) { return _this.rawAttributes[key] = he_1.encode(attributes[key] + ''); });
}
//Update rawString
this.rawAttrs = Object.keys(attributes).map(function (attr) { return attr + '=' + he_1.encode(attributes[attr] + ''); }).join(' ');
};
return HTMLElement;
}(Node));
exports.HTMLElement = HTMLElement;
/**
* Cache to store generated match functions
* @type {Object}
*/
var pMatchFunctionCache = {};
/**
* Function cache
*/
var functionCache = {
"f145": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
if (el.id != tagName.substr(1))
return false;
for (var cls = classes, i = 0; i < cls.length; i++)
if (el.classNames.indexOf(cls[i]) === -1)
return false;
return true;
},
"f45": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
for (var cls = classes, i = 0; i < cls.length; i++)
if (el.classNames.indexOf(cls[i]) === -1)
return false;
return true;
},
"f15": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
if (el.id != tagName.substr(1))
return false;
return true;
},
"f1": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
if (el.id != tagName.substr(1))
return false;
},
"f5": function (el, tagName, classes, attr_key, value) {
"use strict";
el = el || {};
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
return true;
},
"f245": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
var attrs = el.attributes;
for (var key in attrs) {
var val = attrs[key];
if (key == attr_key && val == value) {
return true;
}
}
return false;
// for (let cls = classes, i = 0; i < cls.length; i++) {if (el.classNames.indexOf(cls[i]) === -1){ return false;}}
// return true;
},
"f25": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
var attrs = el.attributes;
for (var key in attrs) {
var val = attrs[key];
if (key == attr_key && val == value) {
return true;
}
}
return false;
//return true;
},
"f2": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
var attrs = el.attributes;
for (var key in attrs) {
var val = attrs[key];
if (key == attr_key && val == value) {
return true;
}
}
return false;
},
"f345": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
if (el.tagName != tagName)
return false;
for (var cls = classes, i = 0; i < cls.length; i++)
if (el.classNames.indexOf(cls[i]) === -1)
return false;
return true;
},
"f35": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
if (el.tagName != tagName)
return false;
return true;
},
"f3": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
if (el.tagName != tagName)
return false;
}
};
/**
* Matcher class to make CSS match
*
* @class Matcher
*/
var Matcher = /** @class */ (function () {
/**
* Creates an instance of Matcher.
* @param {string} selector
*
* @memberof Matcher
*/
function Matcher(selector) {
this.nextMatch = 0;
functionCache["f5"] = functionCache["f5"];
this.matchers = selector.split(' ').map(function (matcher) {
if (pMatchFunctionCache[matcher])
return pMatchFunctionCache[matcher];
var parts = matcher.split('.');
var tagName = parts[0];
var classes = parts.slice(1).sort();
var source = '"use strict";';
var function_name = 'f';
var attr_key = "";
var value = "";
if (tagName && tagName != '*') {
var matcher_1;
if (tagName[0] == '#') {
source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;'; //1
function_name += '1';
}
else if (matcher_1 = tagName.match(/^\[\s*(\S+)\s*(=|!=)\s*((((["'])([^\6]*)\6))|(\S*?))\]\s*/)) {
attr_key = matcher_1[1];
var method = matcher_1[2];
if (method !== '=' && method !== '!=') {
throw new Error('Selector not supported, Expect [key${op}value].op must be =,!=');
}
if (method === '=') {
method = '==';
}
value = matcher_1[7] || matcher_1[8];
source += "let attrs = el.attributes;for (let key in attrs){const val = attrs[key]; if (key == \"" + attr_key + "\" && val == \"" + value + "\"){return true;}} return false;"; //2
function_name += '2';
}
else {
source += 'if (el.tagName != ' + JSON.stringify(tagName) + ') return false;'; //3
function_name += '3';
}
}
if (classes.length > 0) {
source += 'for (let cls = ' + JSON.stringify(classes) + ', i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;'; //4
function_name += '4';
}
source += 'return true;'; //5
function_name += '5';
var obj = {
func: functionCache[function_name],
tagName: tagName || "",
classes: classes || "",
attr_key: attr_key || "",
value: value || ""
};
source = source || "";
return pMatchFunctionCache[matcher] = obj;
});
}
/**
* Trying to advance match pointer
* @param {HTMLElement} el element to make the match
* @return {bool} true when pointer advanced.
*/
Matcher.prototype.advance = function (el) {
if (this.nextMatch < this.matchers.length &&
this.matchers[this.nextMatch].func(el, this.matchers[this.nextMatch].tagName, this.matchers[this.nextMatch].classes, this.matchers[this.nextMatch].attr_key, this.matchers[this.nextMatch].value)) {
this.nextMatch++;
return true;
}
return false;
};
/**
* Rewind the match pointer
*/
Matcher.prototype.rewind = function () {
this.nextMatch--;
};
Object.defineProperty(Matcher.prototype, "matched", {
/**
* Trying to determine if match made.
* @return {bool} true when the match is made
*/
get: function () {
return this.nextMatch == this.matchers.length;
},
enumerable: true,
configurable: true
});
/**
* Rest match pointer.
* @return {[type]} [description]
*/
Matcher.prototype.reset = function () {
this.nextMatch = 0;
};
/**
* flush cache to free memory
*/
Matcher.prototype.flushCache = function () {
pMatchFunctionCache = {};
};
return Matcher;
}());
exports.Matcher = Matcher;
var back_1 = __importDefault(require("./back"));
var comment_1 = __importDefault(require("./nodes/comment"));
var html_1 = require("./nodes/html");
exports.HTMLElement = html_1.default;
var html_2 = __importDefault(require("./nodes/html"));
var text_1 = __importDefault(require("./nodes/text"));
var node_1 = require("./nodes/node");
exports.Node = node_1.default;
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name

@@ -911,7 +64,7 @@ var kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;

function parse(data, options) {
var root = new HTMLElement(null, {});
if (options === void 0) { options = {}; }
var root = new html_2.default(null, {});
var currentParent = root;
var stack = [root];
var lastTextPos = -1;
options = options || {};
var match;

@@ -923,7 +76,7 @@ while (match = kMarkupPattern.exec(data)) {

var text = data.substring(lastTextPos, kMarkupPattern.lastIndex - match[0].length);
currentParent.appendChild(new TextNode(text));
currentParent.appendChild(new text_1.default(text));
}
}
lastTextPos = kMarkupPattern.lastIndex;
if (match[0][1] == '!') {
if (match[0][1] === '!') {
// this is a comment

@@ -933,3 +86,3 @@ if (options.comment) {

var text = data.substring(lastTextPos - 3, lastTextPos - match[0].length + 4);
currentParent.appendChild(new CommentNode(text));
currentParent.appendChild(new comment_1.default(text));
}

@@ -946,9 +99,10 @@ continue;

}
if (!match[4] && kElementsClosedByOpening[currentParent.tagName]) {
if (kElementsClosedByOpening[currentParent.tagName][match[2]]) {
var tagName = currentParent.tagName;
if (!match[4] && kElementsClosedByOpening[tagName]) {
if (kElementsClosedByOpening[tagName][match[2]]) {
stack.pop();
currentParent = arr_back(stack);
currentParent = back_1.default(stack);
}
}
currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
currentParent = currentParent.appendChild(new html_2.default(match[2], attrs, match[3]));
stack.push(currentParent);

@@ -961,3 +115,3 @@ if (kBlockTextElements[match[2]]) {

var text = void 0;
if (index == -1) {
if (index === -1) {
// there is no matching ending for the text element.

@@ -970,6 +124,6 @@ text = data.substr(kMarkupPattern.lastIndex);

if (text.length > 0) {
currentParent.appendChild(new TextNode(text));
currentParent.appendChild(new text_1.default(text));
}
}
if (index == -1) {
if (index === -1) {
lastTextPos = kMarkupPattern.lastIndex = data.length + 1;

@@ -987,13 +141,14 @@ }

while (true) {
if (currentParent.tagName == match[2]) {
if (currentParent.tagName === match[2]) {
stack.pop();
currentParent = arr_back(stack);
currentParent = back_1.default(stack);
break;
}
else {
var tagName = currentParent.tagName;
// Trying to close current tag, and move on
if (kElementsClosedByClosing[currentParent.tagName]) {
if (kElementsClosedByClosing[currentParent.tagName][match[2]]) {
if (kElementsClosedByClosing[tagName]) {
if (kElementsClosedByClosing[tagName][match[2]]) {
stack.pop();
currentParent = arr_back(stack);
currentParent = back_1.default(stack);
continue;

@@ -1015,3 +170,3 @@ }

var last = stack.pop();
var oneBefore = arr_back(stack);
var oneBefore = back_1.default(stack);
if (last.parentNode && last.parentNode.parentNode) {

@@ -1042,3 +197,3 @@ if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {

response.childNodes.forEach(function (node) {
if (node instanceof HTMLElement) {
if (node instanceof html_2.default) {
node.parentNode = null;

@@ -1050,3 +205,3 @@ }

else {
var response = new TextNode(data);
var response = new text_1.default(data);
response.valid = valid;

@@ -1057,1 +212,2 @@ return response;

exports.parse = parse;
exports.default = parse;

@@ -1,14 +0,4 @@

var __extends = (this && this.__extends) || (function () {
var extendStatics = function (d, b) {
extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
return extendStatics(d, b);
};
return function (d, b) {
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
(function (factory) {

@@ -20,3 +10,3 @@ if (typeof module === "object" && typeof module.exports === "object") {

else if (typeof define === "function" && define.amd) {
define(["require", "exports", "he"], factory);
define(["require", "exports", "./back", "./nodes/comment", "./nodes/html", "./nodes/html", "./nodes/text", "./nodes/node"], factory);
}

@@ -26,847 +16,10 @@ })(function (require, exports) {

Object.defineProperty(exports, "__esModule", { value: true });
var he_1 = require("he");
var NodeType;
(function (NodeType) {
NodeType[NodeType["ELEMENT_NODE"] = 1] = "ELEMENT_NODE";
NodeType[NodeType["TEXT_NODE"] = 3] = "TEXT_NODE";
NodeType[NodeType["COMMENT_NODE"] = 8] = "COMMENT_NODE";
})(NodeType = exports.NodeType || (exports.NodeType = {}));
/**
* Node Class as base class for TextNode and HTMLElement.
*/
var Node = /** @class */ (function () {
function Node() {
this.childNodes = [];
}
return Node;
}());
exports.Node = Node;
/**
* TextNode to contain a text element in DOM tree.
* @param {string} value [description]
*/
var TextNode = /** @class */ (function (_super) {
__extends(TextNode, _super);
function TextNode(value) {
var _this = _super.call(this) || this;
/**
* Node Type declaration.
* @type {Number}
*/
_this.nodeType = NodeType.TEXT_NODE;
_this.rawText = value;
return _this;
}
Object.defineProperty(TextNode.prototype, "text", {
/**
* Get unescaped text value of current node and its children.
* @return {string} text content
*/
get: function () {
return he_1.decode(this.rawText);
},
enumerable: true,
configurable: true
});
Object.defineProperty(TextNode.prototype, "isWhitespace", {
/**
* Detect if the node contains only white space.
* @return {bool}
*/
get: function () {
return /^(\s|&nbsp;)*$/.test(this.rawText);
},
enumerable: true,
configurable: true
});
TextNode.prototype.toString = function () {
return this.text;
};
return TextNode;
}(Node));
exports.TextNode = TextNode;
var CommentNode = /** @class */ (function (_super) {
__extends(CommentNode, _super);
function CommentNode(value) {
var _this = _super.call(this) || this;
/**
* Node Type declaration.
* @type {Number}
*/
_this.nodeType = NodeType.COMMENT_NODE;
_this.rawText = value;
return _this;
}
Object.defineProperty(CommentNode.prototype, "text", {
/**
* Get unescaped text value of current node and its children.
* @return {string} text content
*/
get: function () {
return he_1.decode(this.rawText);
},
enumerable: true,
configurable: true
});
CommentNode.prototype.toString = function () {
return "<!--" + this.rawText + "-->";
};
return CommentNode;
}(Node));
exports.CommentNode = CommentNode;
var kBlockElements = {
div: true,
p: true,
// ul: true,
// ol: true,
li: true,
// table: true,
// tr: true,
td: true,
section: true,
br: true
};
function arr_back(arr) {
return arr[arr.length - 1];
}
/**
* HTMLElement, which contains a set of children.
*
* Note: this is a minimalist implementation, no complete tree
* structure provided (no parentNode, nextSibling,
* previousSibling etc).
* @class HTMLElement
* @extends {Node}
*/
var HTMLElement = /** @class */ (function (_super) {
__extends(HTMLElement, _super);
/**
* Creates an instance of HTMLElement.
* @param keyAttrs id and class attribute
* @param [rawAttrs] attributes in string
*
* @memberof HTMLElement
*/
function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode) {
if (rawAttrs === void 0) { rawAttrs = ''; }
if (parentNode === void 0) { parentNode = null; }
var _this = _super.call(this) || this;
_this.tagName = tagName;
_this.rawAttrs = rawAttrs;
_this.parentNode = parentNode;
_this.classNames = [];
/**
* Node Type declaration.
*/
_this.nodeType = NodeType.ELEMENT_NODE;
_this.rawAttrs = rawAttrs || '';
_this.parentNode = parentNode || null;
_this.childNodes = [];
if (keyAttrs.id) {
_this.id = keyAttrs.id;
}
if (keyAttrs.class) {
_this.classNames = keyAttrs.class.split(/\s+/);
}
return _this;
}
/**
* Remove Child element from childNodes array
* @param {HTMLElement} node node to remove
*/
HTMLElement.prototype.removeChild = function (node) {
this.childNodes = this.childNodes.filter(function (child) {
return (child !== node);
});
};
/**
* Exchanges given child with new child
* @param {HTMLElement} oldNode node to exchange
* @param {HTMLElement} newNode new node
*/
HTMLElement.prototype.exchangeChild = function (oldNode, newNode) {
var idx = -1;
for (var i = 0; i < this.childNodes.length; i++) {
if (this.childNodes[i] === oldNode) {
idx = i;
break;
}
}
this.childNodes[idx] = newNode;
};
Object.defineProperty(HTMLElement.prototype, "rawText", {
/**
* Get escpaed (as-it) text value of current node and its children.
* @return {string} text content
*/
get: function () {
var res = '';
for (var i = 0; i < this.childNodes.length; i++)
res += this.childNodes[i].rawText;
return res;
},
enumerable: true,
configurable: true
});
Object.defineProperty(HTMLElement.prototype, "text", {
/**
* Get unescaped text value of current node and its children.
* @return {string} text content
*/
get: function () {
return he_1.decode(this.rawText);
},
enumerable: true,
configurable: true
});
Object.defineProperty(HTMLElement.prototype, "structuredText", {
/**
* Get structured Text (with '\n' etc.)
* @return {string} structured text
*/
get: function () {
var currentBlock = [];
var blocks = [currentBlock];
function dfs(node) {
if (node.nodeType === NodeType.ELEMENT_NODE) {
if (kBlockElements[node.tagName]) {
if (currentBlock.length > 0) {
blocks.push(currentBlock = []);
}
node.childNodes.forEach(dfs);
if (currentBlock.length > 0) {
blocks.push(currentBlock = []);
}
}
else {
node.childNodes.forEach(dfs);
}
}
else if (node.nodeType === NodeType.TEXT_NODE) {
if (node.isWhitespace) {
// Whitespace node, postponed output
currentBlock.prependWhitespace = true;
}
else {
var text = node.text;
if (currentBlock.prependWhitespace) {
text = ' ' + text;
currentBlock.prependWhitespace = false;
}
currentBlock.push(text);
}
}
}
dfs(this);
return blocks
.map(function (block) {
// Normalize each line's whitespace
return block.join('').trim().replace(/\s{2,}/g, ' ');
})
.join('\n').replace(/\s+$/, ''); // trimRight;
},
enumerable: true,
configurable: true
});
HTMLElement.prototype.toString = function () {
var tag = this.tagName;
if (tag) {
var is_un_closed = /^meta$/i.test(tag);
var is_self_closed = /^(img|br|hr|area|base|input|doctype|link)$/i.test(tag);
var attrs = this.rawAttrs ? ' ' + this.rawAttrs : '';
if (is_un_closed) {
return "<" + tag + attrs + ">";
}
else if (is_self_closed) {
return "<" + tag + attrs + " />";
}
else {
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
}
}
else {
return this.innerHTML;
}
};
Object.defineProperty(HTMLElement.prototype, "innerHTML", {
get: function () {
return this.childNodes.map(function (child) {
return child.toString();
}).join('');
},
enumerable: true,
configurable: true
});
HTMLElement.prototype.set_content = function (content) {
if (content instanceof Node) {
content = [content];
}
else if (typeof content == 'string') {
var r = parse(content);
content = r.childNodes.length ? r.childNodes : [new TextNode(content)];
}
this.childNodes = content;
};
Object.defineProperty(HTMLElement.prototype, "outerHTML", {
get: function () {
return this.toString();
},
enumerable: true,
configurable: true
});
/**
* Trim element from right (in block) after seeing pattern in a TextNode.
* @param {RegExp} pattern pattern to find
* @return {HTMLElement} reference to current node
*/
HTMLElement.prototype.trimRight = function (pattern) {
for (var i = 0; i < this.childNodes.length; i++) {
var childNode = this.childNodes[i];
if (childNode.nodeType === NodeType.ELEMENT_NODE) {
childNode.trimRight(pattern);
}
else {
var index = childNode.rawText.search(pattern);
if (index > -1) {
childNode.rawText = childNode.rawText.substr(0, index);
// trim all following nodes.
this.childNodes.length = i + 1;
}
}
}
return this;
};
Object.defineProperty(HTMLElement.prototype, "structure", {
/**
* Get DOM structure
* @return {string} strucutre
*/
get: function () {
var res = [];
var indention = 0;
function write(str) {
res.push(' '.repeat(indention) + str);
}
function dfs(node) {
var idStr = node.id ? ('#' + node.id) : '';
var classStr = node.classNames.length ? ('.' + node.classNames.join('.')) : '';
write(node.tagName + idStr + classStr);
indention++;
for (var i = 0; i < node.childNodes.length; i++) {
var childNode = node.childNodes[i];
if (childNode.nodeType === NodeType.ELEMENT_NODE) {
dfs(childNode);
}
else if (childNode.nodeType === NodeType.TEXT_NODE) {
if (!childNode.isWhitespace)
write('#text');
}
}
indention--;
}
dfs(this);
return res.join('\n');
},
enumerable: true,
configurable: true
});
/**
* Remove whitespaces in this sub tree.
* @return {HTMLElement} pointer to this
*/
HTMLElement.prototype.removeWhitespace = function () {
var o = 0;
for (var i = 0; i < this.childNodes.length; i++) {
var node = this.childNodes[i];
if (node.nodeType === NodeType.TEXT_NODE) {
if (node.isWhitespace)
continue;
node.rawText = node.rawText.trim();
}
else if (node.nodeType === NodeType.ELEMENT_NODE) {
node.removeWhitespace();
}
this.childNodes[o++] = node;
}
this.childNodes.length = o;
return this;
};
/**
* Query CSS selector to find matching nodes.
* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement[]} matching elements
*/
HTMLElement.prototype.querySelectorAll = function (selector) {
var _this = this;
var matcher;
if (selector instanceof Matcher) {
matcher = selector;
matcher.reset();
}
else {
if (selector.includes(',')) {
var selectors = selector.split(',');
return Array.from(selectors.reduce(function (pre, cur) {
var result = _this.querySelectorAll(cur.trim());
return result.reduce(function (p, c) {
return p.add(c);
}, pre);
}, new Set()));
}
matcher = new Matcher(selector);
}
var res = [];
var stack = [];
for (var i = 0; i < this.childNodes.length; i++) {
stack.push([this.childNodes[i], 0, false]);
while (stack.length) {
var state = arr_back(stack);
var el = state[0];
if (state[1] === 0) {
// Seen for first time.
if (el.nodeType !== NodeType.ELEMENT_NODE) {
stack.pop();
continue;
}
if (state[2] = matcher.advance(el)) {
if (matcher.matched) {
res.push(el);
// no need to go further.
matcher.rewind();
stack.pop();
continue;
}
}
}
if (state[1] < el.childNodes.length) {
stack.push([el.childNodes[state[1]++], 0, false]);
}
else {
if (state[2])
matcher.rewind();
stack.pop();
}
}
}
return res;
};
/**
* Query CSS Selector to find matching node.
* @param {string} selector Simplified CSS selector
* @param {Matcher} selector A Matcher instance
* @return {HTMLElement} matching node
*/
HTMLElement.prototype.querySelector = function (selector) {
var matcher;
if (selector instanceof Matcher) {
matcher = selector;
matcher.reset();
}
else {
matcher = new Matcher(selector);
}
var stack = [];
for (var i = 0; i < this.childNodes.length; i++) {
stack.push([this.childNodes[i], 0, false]);
while (stack.length) {
var state = arr_back(stack);
var el = state[0];
if (state[1] === 0) {
// Seen for first time.
if (el.nodeType !== NodeType.ELEMENT_NODE) {
stack.pop();
continue;
}
if (state[2] = matcher.advance(el)) {
if (matcher.matched) {
return el;
}
}
}
if (state[1] < el.childNodes.length) {
stack.push([el.childNodes[state[1]++], 0, false]);
}
else {
if (state[2])
matcher.rewind();
stack.pop();
}
}
}
return null;
};
/**
* Append a child node to childNodes
* @param {Node} node node to append
* @return {Node} node appended
*/
HTMLElement.prototype.appendChild = function (node) {
// node.parentNode = this;
this.childNodes.push(node);
if (node instanceof HTMLElement) {
node.parentNode = this;
}
return node;
};
Object.defineProperty(HTMLElement.prototype, "firstChild", {
/**
* Get first child node
* @return {Node} first child node
*/
get: function () {
return this.childNodes[0];
},
enumerable: true,
configurable: true
});
Object.defineProperty(HTMLElement.prototype, "lastChild", {
/**
* Get last child node
* @return {Node} last child node
*/
get: function () {
return arr_back(this.childNodes);
},
enumerable: true,
configurable: true
});
Object.defineProperty(HTMLElement.prototype, "attributes", {
/**
* Get attributes
* @return {Object} parsed and unescaped attributes
*/
get: function () {
if (this._attrs)
return this._attrs;
this._attrs = {};
var attrs = this.rawAttributes;
for (var key in attrs) {
this._attrs[key] = he_1.decode(attrs[key]);
}
return this._attrs;
},
enumerable: true,
configurable: true
});
Object.defineProperty(HTMLElement.prototype, "rawAttributes", {
/**
* Get escaped (as-it) attributes
* @return {Object} parsed attributes
*/
get: function () {
if (this._rawAttrs)
return this._rawAttrs;
var attrs = {};
if (this.rawAttrs) {
var re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
var match = void 0;
while (match = re.exec(this.rawAttrs)) {
attrs[match[1]] = match[2] || match[3] || match[4] || "";
}
}
this._rawAttrs = attrs;
return attrs;
},
enumerable: true,
configurable: true
});
/**
* Set an attribute value to the HTMLElement
* @param {string} key The attribute name
* @param {string} value The value to set, or null / undefined to remove an attribute
*/
HTMLElement.prototype.setAttribute = function (key, value) {
//Update the attributes map
var attrs = this.attributes;
if (value === undefined || value === null)
delete attrs[key];
else
attrs[key] = value + '';
//Update the raw attributes
if (this._rawAttrs) {
if (value === undefined || value === null)
delete this._rawAttrs[key];
else
this._rawAttrs[key] = he_1.encode(value + '');
}
//Update rawString
this.rawAttrs = Object.keys(attrs).map(function (attr) { return attr + '=' + he_1.encode(attrs[attr]); }).join(' ');
};
/**
* Replace all the attributes of the HTMLElement by the provided attributes
* @param {Attributes} attributes the new attribute set
*/
HTMLElement.prototype.setAttributes = function (attributes) {
var _this = this;
//Update the attributes map
if (this.attributes) {
Object.keys(this.attributes).forEach(function (key) { return delete _this.attributes[key]; });
Object.keys(attributes).forEach(function (key) { return _this.attributes[key] = attributes[key] + ''; });
}
//Update the raw attributes map
if (this.rawAttributes) {
Object.keys(this.rawAttributes).forEach(function (key) { return delete _this.rawAttributes[key]; });
Object.keys(attributes).forEach(function (key) { return _this.rawAttributes[key] = he_1.encode(attributes[key] + ''); });
}
//Update rawString
this.rawAttrs = Object.keys(attributes).map(function (attr) { return attr + '=' + he_1.encode(attributes[attr] + ''); }).join(' ');
};
return HTMLElement;
}(Node));
exports.HTMLElement = HTMLElement;
/**
* Cache to store generated match functions
* @type {Object}
*/
var pMatchFunctionCache = {};
/**
* Function cache
*/
var functionCache = {
"f145": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
if (el.id != tagName.substr(1))
return false;
for (var cls = classes, i = 0; i < cls.length; i++)
if (el.classNames.indexOf(cls[i]) === -1)
return false;
return true;
},
"f45": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
for (var cls = classes, i = 0; i < cls.length; i++)
if (el.classNames.indexOf(cls[i]) === -1)
return false;
return true;
},
"f15": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
if (el.id != tagName.substr(1))
return false;
return true;
},
"f1": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
if (el.id != tagName.substr(1))
return false;
},
"f5": function (el, tagName, classes, attr_key, value) {
"use strict";
el = el || {};
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
return true;
},
"f245": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
var attrs = el.attributes;
for (var key in attrs) {
var val = attrs[key];
if (key == attr_key && val == value) {
return true;
}
}
return false;
// for (let cls = classes, i = 0; i < cls.length; i++) {if (el.classNames.indexOf(cls[i]) === -1){ return false;}}
// return true;
},
"f25": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
var attrs = el.attributes;
for (var key in attrs) {
var val = attrs[key];
if (key == attr_key && val == value) {
return true;
}
}
return false;
//return true;
},
"f2": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
var attrs = el.attributes;
for (var key in attrs) {
var val = attrs[key];
if (key == attr_key && val == value) {
return true;
}
}
return false;
},
"f345": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
if (el.tagName != tagName)
return false;
for (var cls = classes, i = 0; i < cls.length; i++)
if (el.classNames.indexOf(cls[i]) === -1)
return false;
return true;
},
"f35": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
if (el.tagName != tagName)
return false;
return true;
},
"f3": function (el, tagName, classes, attr_key, value) {
"use strict";
tagName = tagName || "";
classes = classes || [];
attr_key = attr_key || "";
value = value || "";
if (el.tagName != tagName)
return false;
}
};
/**
* Matcher class to make CSS match
*
* @class Matcher
*/
var Matcher = /** @class */ (function () {
/**
* Creates an instance of Matcher.
* @param {string} selector
*
* @memberof Matcher
*/
function Matcher(selector) {
this.nextMatch = 0;
functionCache["f5"] = functionCache["f5"];
this.matchers = selector.split(' ').map(function (matcher) {
if (pMatchFunctionCache[matcher])
return pMatchFunctionCache[matcher];
var parts = matcher.split('.');
var tagName = parts[0];
var classes = parts.slice(1).sort();
var source = '"use strict";';
var function_name = 'f';
var attr_key = "";
var value = "";
if (tagName && tagName != '*') {
var matcher_1;
if (tagName[0] == '#') {
source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;'; //1
function_name += '1';
}
else if (matcher_1 = tagName.match(/^\[\s*(\S+)\s*(=|!=)\s*((((["'])([^\6]*)\6))|(\S*?))\]\s*/)) {
attr_key = matcher_1[1];
var method = matcher_1[2];
if (method !== '=' && method !== '!=') {
throw new Error('Selector not supported, Expect [key${op}value].op must be =,!=');
}
if (method === '=') {
method = '==';
}
value = matcher_1[7] || matcher_1[8];
source += "let attrs = el.attributes;for (let key in attrs){const val = attrs[key]; if (key == \"" + attr_key + "\" && val == \"" + value + "\"){return true;}} return false;"; //2
function_name += '2';
}
else {
source += 'if (el.tagName != ' + JSON.stringify(tagName) + ') return false;'; //3
function_name += '3';
}
}
if (classes.length > 0) {
source += 'for (let cls = ' + JSON.stringify(classes) + ', i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;'; //4
function_name += '4';
}
source += 'return true;'; //5
function_name += '5';
var obj = {
func: functionCache[function_name],
tagName: tagName || "",
classes: classes || "",
attr_key: attr_key || "",
value: value || ""
};
source = source || "";
return pMatchFunctionCache[matcher] = obj;
});
}
/**
* Trying to advance match pointer
* @param {HTMLElement} el element to make the match
* @return {bool} true when pointer advanced.
*/
Matcher.prototype.advance = function (el) {
if (this.nextMatch < this.matchers.length &&
this.matchers[this.nextMatch].func(el, this.matchers[this.nextMatch].tagName, this.matchers[this.nextMatch].classes, this.matchers[this.nextMatch].attr_key, this.matchers[this.nextMatch].value)) {
this.nextMatch++;
return true;
}
return false;
};
/**
* Rewind the match pointer
*/
Matcher.prototype.rewind = function () {
this.nextMatch--;
};
Object.defineProperty(Matcher.prototype, "matched", {
/**
* Trying to determine if match made.
* @return {bool} true when the match is made
*/
get: function () {
return this.nextMatch == this.matchers.length;
},
enumerable: true,
configurable: true
});
/**
* Rest match pointer.
* @return {[type]} [description]
*/
Matcher.prototype.reset = function () {
this.nextMatch = 0;
};
/**
* flush cache to free memory
*/
Matcher.prototype.flushCache = function () {
pMatchFunctionCache = {};
};
return Matcher;
}());
exports.Matcher = Matcher;
var back_1 = __importDefault(require("./back"));
var comment_1 = __importDefault(require("./nodes/comment"));
var html_1 = require("./nodes/html");
exports.HTMLElement = html_1.default;
var html_2 = __importDefault(require("./nodes/html"));
var text_1 = __importDefault(require("./nodes/text"));
var node_1 = require("./nodes/node");
exports.Node = node_1.default;
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name

@@ -922,7 +75,7 @@ var kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;

function parse(data, options) {
var root = new HTMLElement(null, {});
if (options === void 0) { options = {}; }
var root = new html_2.default(null, {});
var currentParent = root;
var stack = [root];
var lastTextPos = -1;
options = options || {};
var match;

@@ -934,7 +87,7 @@ while (match = kMarkupPattern.exec(data)) {

var text = data.substring(lastTextPos, kMarkupPattern.lastIndex - match[0].length);
currentParent.appendChild(new TextNode(text));
currentParent.appendChild(new text_1.default(text));
}
}
lastTextPos = kMarkupPattern.lastIndex;
if (match[0][1] == '!') {
if (match[0][1] === '!') {
// this is a comment

@@ -944,3 +97,3 @@ if (options.comment) {

var text = data.substring(lastTextPos - 3, lastTextPos - match[0].length + 4);
currentParent.appendChild(new CommentNode(text));
currentParent.appendChild(new comment_1.default(text));
}

@@ -957,9 +110,10 @@ continue;

}
if (!match[4] && kElementsClosedByOpening[currentParent.tagName]) {
if (kElementsClosedByOpening[currentParent.tagName][match[2]]) {
var tagName = currentParent.tagName;
if (!match[4] && kElementsClosedByOpening[tagName]) {
if (kElementsClosedByOpening[tagName][match[2]]) {
stack.pop();
currentParent = arr_back(stack);
currentParent = back_1.default(stack);
}
}
currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
currentParent = currentParent.appendChild(new html_2.default(match[2], attrs, match[3]));
stack.push(currentParent);

@@ -972,3 +126,3 @@ if (kBlockTextElements[match[2]]) {

var text = void 0;
if (index == -1) {
if (index === -1) {
// there is no matching ending for the text element.

@@ -981,6 +135,6 @@ text = data.substr(kMarkupPattern.lastIndex);

if (text.length > 0) {
currentParent.appendChild(new TextNode(text));
currentParent.appendChild(new text_1.default(text));
}
}
if (index == -1) {
if (index === -1) {
lastTextPos = kMarkupPattern.lastIndex = data.length + 1;

@@ -998,13 +152,14 @@ }

while (true) {
if (currentParent.tagName == match[2]) {
if (currentParent.tagName === match[2]) {
stack.pop();
currentParent = arr_back(stack);
currentParent = back_1.default(stack);
break;
}
else {
var tagName = currentParent.tagName;
// Trying to close current tag, and move on
if (kElementsClosedByClosing[currentParent.tagName]) {
if (kElementsClosedByClosing[currentParent.tagName][match[2]]) {
if (kElementsClosedByClosing[tagName]) {
if (kElementsClosedByClosing[tagName][match[2]]) {
stack.pop();
currentParent = arr_back(stack);
currentParent = back_1.default(stack);
continue;

@@ -1026,3 +181,3 @@ }

var last = stack.pop();
var oneBefore = arr_back(stack);
var oneBefore = back_1.default(stack);
if (last.parentNode && last.parentNode.parentNode) {

@@ -1053,3 +208,3 @@ if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {

response.childNodes.forEach(function (node) {
if (node instanceof HTMLElement) {
if (node instanceof html_2.default) {
node.parentNode = null;

@@ -1061,3 +216,3 @@ }

else {
var response = new TextNode(data);
var response = new text_1.default(data);
response.valid = valid;

@@ -1068,2 +223,3 @@ return response;

exports.parse = parse;
exports.default = parse;
});
{
"name": "node-html-parser",
"version": "1.1.20",
"version": "1.2.0",
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",

@@ -9,6 +9,7 @@ "main": "dist/index.js",

"test": "mocha",
"lint": "eslint ./src/*.ts ./src/**/*.ts",
"clean": "del-cli ./dist/",
"ts:cjs": "tsc -m commonjs",
"ts:umd": "tsc -t es5 -m umd -d false --outDir ./dist/umd/",
"build": "npm run clean && npm run ts:cjs && npm run ts:umd",
"build": "npm run lint && npm run clean && npm run ts:cjs && npm run ts:umd",
"dev": "tsc -w",

@@ -29,2 +30,5 @@ "pretest": "tsc -m commonjs"

"devDependencies": {
"@typescript-eslint/eslint-plugin": "latest",
"@typescript-eslint/eslint-plugin-tslint": "latest",
"@typescript-eslint/parser": "latest",
"@types/entities": "latest",

@@ -35,2 +39,3 @@ "@types/he": "latest",

"del-cli": "latest",
"eslint": "latest",
"mocha": "latest",

@@ -40,2 +45,3 @@ "should": "latest",

"travis-cov": "latest",
"tslint": "latest",
"typescript": "next"

@@ -42,0 +48,0 @@ },

@@ -145,2 +145,10 @@ # Fast HTML Parser [![NPM version](https://badge.fury.io/js/node-html-parser.png)](http://badge.fury.io/js/node-html-parser) [![Build Status](https://travis-ci.org/taoqf/node-html-parser.svg?branch=master)](https://travis-ci.org/taoqf/node-html-parser)

### HTMLElement#setAttribute(key: string, value: string | null)
Set `value` to `key` attribute. If `value` is null, remove the attribute instead.
### HTMLElement#setAttributes(attributes: Attributes)
Replace all the current attributes by the provided attribute set.
### HTMLElement#toString()

@@ -154,3 +162,4 @@ Same as [outerHTML](#htmlelementouterhtml)

Get outerHTML.
### HTMLElement#set_content(content: string | Node | Node[])
Set content. **Notice**: Do not set content of the **root** node.

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc