node-html-parser
Advanced tools
Comparing version 1.2.20 to 1.2.21
"use strict"; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.NodeType = exports.TextNode = exports.Node = exports.default = exports.parse = exports.HTMLElement = exports.CommentNode = void 0; | ||
var comment_1 = require("./nodes/comment"); | ||
exports.CommentNode = comment_1.default; | ||
Object.defineProperty(exports, "CommentNode", { enumerable: true, get: function () { return __importDefault(comment_1).default; } }); | ||
var html_1 = require("./nodes/html"); | ||
exports.HTMLElement = html_1.default; | ||
exports.parse = html_1.parse; | ||
exports.default = html_1.parse; | ||
Object.defineProperty(exports, "HTMLElement", { enumerable: true, get: function () { return __importDefault(html_1).default; } }); | ||
Object.defineProperty(exports, "parse", { enumerable: true, get: function () { return html_1.parse; } }); | ||
Object.defineProperty(exports, "default", { enumerable: true, get: function () { return html_1.parse; } }); | ||
var node_1 = require("./nodes/node"); | ||
exports.Node = node_1.default; | ||
Object.defineProperty(exports, "Node", { enumerable: true, get: function () { return __importDefault(node_1).default; } }); | ||
var text_1 = require("./nodes/text"); | ||
exports.TextNode = text_1.default; | ||
Object.defineProperty(exports, "TextNode", { enumerable: true, get: function () { return __importDefault(text_1).default; } }); | ||
var type_1 = require("./nodes/type"); | ||
exports.NodeType = type_1.default; | ||
Object.defineProperty(exports, "NodeType", { enumerable: true, get: function () { return __importDefault(type_1).default; } }); |
298
dist/main.js
@@ -5,3 +5,3 @@ var __extends = (this && this.__extends) || (function () { | ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || | ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; }; | ||
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; | ||
return extendStatics(d, b); | ||
@@ -18,2 +18,9 @@ }; | ||
}; | ||
var __spreadArrays = (this && this.__spreadArrays) || function () { | ||
for (var s = 0, i = 0, il = arguments.length; i < il; i++) s += arguments[i].length; | ||
for (var r = Array(s), k = 0, i = 0; i < il; i++) | ||
for (var a = arguments[i], j = 0, jl = a.length; j < jl; j++, k++) | ||
r[k] = a[j]; | ||
return r; | ||
}; | ||
define("back", ["require", "exports"], function (require, exports) { | ||
@@ -48,2 +55,9 @@ "use strict"; | ||
} | ||
Object.defineProperty(Node.prototype, "innerText", { | ||
get: function () { | ||
return this.rawText; | ||
}, | ||
enumerable: false, | ||
configurable: true | ||
}); | ||
return Node; | ||
@@ -60,4 +74,5 @@ }()); | ||
__extends(CommentNode, _super); | ||
function CommentNode(value) { | ||
function CommentNode(rawText) { | ||
var _this = _super.call(this) || this; | ||
_this.rawText = rawText; | ||
/** | ||
@@ -68,3 +83,2 @@ * Node Type declaration. | ||
_this.nodeType = type_1.default.COMMENT_NODE; | ||
_this.rawText = value; | ||
return _this; | ||
@@ -80,3 +94,3 @@ } | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -102,4 +116,5 @@ }); | ||
__extends(TextNode, _super); | ||
function TextNode(value) { | ||
function TextNode(rawText) { | ||
var _this = _super.call(this) || this; | ||
_this.rawText = rawText; | ||
/** | ||
@@ -110,3 +125,2 @@ * Node Type declaration. | ||
_this.nodeType = type_2.default.TEXT_NODE; | ||
_this.rawText = value; | ||
return _this; | ||
@@ -122,3 +136,3 @@ } | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -134,3 +148,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -153,2 +167,11 @@ }); | ||
var pMatchFunctionCache = {}; | ||
function compare_tagname(tag1, tag2) { | ||
if (!tag1) { | ||
return !tag2; | ||
} | ||
if (!tag2) { | ||
return !tag1; | ||
} | ||
return tag1.toLowerCase() === tag2.toLowerCase(); | ||
} | ||
/** | ||
@@ -253,3 +276,3 @@ * Function cache | ||
classes = classes || []; | ||
if (el.tagName !== tagName) { | ||
if (!compare_tagname(el.tagName, tagName)) { | ||
return false; | ||
@@ -267,3 +290,3 @@ } | ||
tagName = tagName || ''; | ||
return el.tagName === tagName; | ||
return compare_tagname(el.tagName, tagName); | ||
}, | ||
@@ -273,5 +296,6 @@ f3: function (el, tagName) { | ||
tagName = tagName || ''; | ||
if (el.tagName !== tagName) { | ||
return false; | ||
} | ||
// if (el.tagName !== tagName) { | ||
// return false; | ||
// } | ||
return compare_tagname(el.tagName, tagName); | ||
} | ||
@@ -293,6 +317,6 @@ }; | ||
this.nextMatch = 0; | ||
functionCache.f5 = functionCache.f5; | ||
this.matchers = selector.split(' ').map(function (matcher) { | ||
if (pMatchFunctionCache[matcher]) | ||
if (pMatchFunctionCache[matcher]) { | ||
return pMatchFunctionCache[matcher]; | ||
} | ||
var parts = matcher.split('.'); | ||
@@ -317,2 +341,3 @@ var tagName = parts[0]; | ||
if (method !== '=' && method !== '!=') { | ||
// eslint-disable-next-line no-template-curly-in-string | ||
throw new Error('Selector not supported, Expect [key${op}value].op must be =,!='); | ||
@@ -327,3 +352,3 @@ } | ||
} | ||
else if (reg = /^\[(.*?)\]/.exec(tagName)) { | ||
else if ((reg = /^\[(.*?)\]/.exec(tagName))) { | ||
attr_key = reg[1]; | ||
@@ -352,3 +377,3 @@ function_name += '5'; | ||
// source = source || ''; | ||
return pMatchFunctionCache[matcher] = obj; | ||
return (pMatchFunctionCache[matcher] = obj); | ||
}); | ||
@@ -383,3 +408,3 @@ } | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -407,2 +432,3 @@ }); | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.parse = void 0; | ||
node_3 = __importDefault(node_3); | ||
@@ -415,12 +441,12 @@ type_3 = __importDefault(type_3); | ||
var kBlockElements = { | ||
div: true, | ||
p: true, | ||
DIV: true, | ||
P: true, | ||
// ul: true, | ||
// ol: true, | ||
li: true, | ||
LI: true, | ||
// table: true, | ||
// tr: true, | ||
td: true, | ||
section: true, | ||
br: true | ||
TD: true, | ||
SECTION: true, | ||
BR: true | ||
}; | ||
@@ -449,3 +475,2 @@ /** | ||
var _this = _super.call(this) || this; | ||
_this.tagName = tagName; | ||
_this.rawAttrs = rawAttrs; | ||
@@ -458,2 +483,3 @@ _this.parentNode = parentNode; | ||
_this.nodeType = type_3.default.ELEMENT_NODE; | ||
_this._tag_name = tagName; | ||
_this.rawAttrs = rawAttrs || ''; | ||
@@ -506,2 +532,9 @@ _this.parentNode = parentNode || null; | ||
}; | ||
Object.defineProperty(HTMLElement.prototype, "tagName", { | ||
get: function () { | ||
return this._tag_name ? this._tag_name.toUpperCase() : this._tag_name; | ||
}, | ||
enumerable: false, | ||
configurable: true | ||
}); | ||
Object.defineProperty(HTMLElement.prototype, "rawText", { | ||
@@ -514,6 +547,6 @@ /** | ||
return this.childNodes.reduce(function (pre, cur) { | ||
return pre += cur.rawText; | ||
return (pre += cur.rawText); | ||
}, ''); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -529,3 +562,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -564,3 +597,3 @@ }); | ||
if (currentBlock.prependWhitespace) { | ||
text = ' ' + text; | ||
text = " " + text; | ||
currentBlock.prependWhitespace = false; | ||
@@ -573,4 +606,3 @@ } | ||
dfs(this); | ||
return blocks | ||
.map(function (block) { | ||
return blocks.map(function (block) { | ||
// Normalize each line's whitespace | ||
@@ -581,20 +613,16 @@ return block.join('').trim().replace(/\s{2,}/g, ' '); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
}); | ||
HTMLElement.prototype.toString = function () { | ||
var tag = this.tagName; | ||
var tag = this._tag_name; | ||
if (tag) { | ||
var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag); | ||
var attrs = this.rawAttrs ? ' ' + this.rawAttrs : ''; | ||
var attrs = this.rawAttrs ? " " + this.rawAttrs : ''; | ||
if (is_void) { | ||
return "<" + tag + attrs + ">"; | ||
} | ||
else { | ||
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">"; | ||
} | ||
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">"; | ||
} | ||
else { | ||
return this.innerHTML; | ||
} | ||
return this.innerHTML; | ||
}; | ||
@@ -607,3 +635,3 @@ Object.defineProperty(HTMLElement.prototype, "innerHTML", { | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -626,3 +654,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -664,5 +692,5 @@ }); | ||
function dfs(node) { | ||
var idStr = node.id ? ('#' + node.id) : ''; | ||
var classStr = node.classNames.length ? ('.' + node.classNames.join('.')) : ''; | ||
write(node.tagName + idStr + classStr); | ||
var idStr = node.id ? ("#" + node.id) : ''; | ||
var classStr = node.classNames.length ? ("." + node.classNames.join('.')) : ''; | ||
write(node._tag_name + idStr + classStr); | ||
indention++; | ||
@@ -674,4 +702,5 @@ node.childNodes.forEach(function (childNode) { | ||
else if (childNode.nodeType === type_3.default.TEXT_NODE) { | ||
if (!childNode.isWhitespace) | ||
if (!childNode.isWhitespace) { | ||
write('#text'); | ||
} | ||
} | ||
@@ -684,3 +713,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -812,4 +841,5 @@ }); | ||
else { | ||
if (state[2]) | ||
if (state[2]) { | ||
matcher.rewind(); | ||
} | ||
stack.pop(); | ||
@@ -842,3 +872,3 @@ } | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -854,3 +884,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -875,3 +905,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -885,9 +915,10 @@ }); | ||
get: function () { | ||
if (this._rawAttrs) | ||
if (this._rawAttrs) { | ||
return this._rawAttrs; | ||
} | ||
var attrs = {}; | ||
if (this.rawAttrs) { | ||
var re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig; | ||
var re = /\b([a-z][a-z0-9-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig; | ||
var match = void 0; | ||
while (match = re.exec(this.rawAttrs)) { | ||
while ((match = re.exec(this.rawAttrs))) { | ||
attrs[match[1]] = match[2] || match[3] || match[4] || null; | ||
@@ -899,3 +930,3 @@ } | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -916,5 +947,3 @@ }); | ||
} | ||
else { | ||
return name + '=' + val; | ||
} | ||
return name + "=" + val; | ||
}).join(' '); | ||
@@ -952,5 +981,3 @@ }; | ||
} | ||
else { | ||
return name + '=' + val; | ||
} | ||
return name + "=" + val; | ||
}).join(' '); | ||
@@ -977,9 +1004,7 @@ }; | ||
} | ||
else { | ||
return name + '=' + JSON.stringify(String(val)); | ||
} | ||
return name + "=" + JSON.stringify(String(val)); | ||
}).join(' '); | ||
}; | ||
HTMLElement.prototype.insertAdjacentHTML = function (where, html) { | ||
var _a, _b; | ||
var _a, _b, _c; | ||
var _this = this; | ||
@@ -991,8 +1016,14 @@ if (arguments.length < 2) { | ||
if (where === 'afterend') { | ||
var idx = this.parentNode.childNodes.findIndex(function (child) { | ||
return child === _this; | ||
}); | ||
(_a = this.parentNode.childNodes).splice.apply(_a, __spreadArrays([idx + 1, 0], p.childNodes)); | ||
p.childNodes.forEach(function (n) { | ||
_this.parentNode.appendChild(n); | ||
if (n instanceof HTMLElement) { | ||
n.parentNode = _this.parentNode; | ||
} | ||
}); | ||
} | ||
else if (where === 'afterbegin') { | ||
(_a = this.childNodes).unshift.apply(_a, p.childNodes); | ||
(_b = this.childNodes).unshift.apply(_b, p.childNodes); | ||
} | ||
@@ -1005,3 +1036,11 @@ else if (where === 'beforeend') { | ||
else if (where === 'beforebegin') { | ||
(_b = this.parentNode.childNodes).unshift.apply(_b, p.childNodes); | ||
var idx = this.parentNode.childNodes.findIndex(function (child) { | ||
return child === _this; | ||
}); | ||
(_c = this.parentNode.childNodes).splice.apply(_c, __spreadArrays([idx, 0], p.childNodes)); | ||
p.childNodes.forEach(function (n) { | ||
if (n instanceof HTMLElement) { | ||
n.parentNode = _this.parentNode; | ||
} | ||
}); | ||
} | ||
@@ -1011,5 +1050,5 @@ else { | ||
} | ||
if (!where || html === undefined || html === null) { | ||
return; | ||
} | ||
// if (!where || html === undefined || html === null) { | ||
// return; | ||
// } | ||
}; | ||
@@ -1024,39 +1063,71 @@ return HTMLElement; | ||
area: true, | ||
AREA: true, | ||
base: true, | ||
BASE: true, | ||
br: true, | ||
BR: true, | ||
col: true, | ||
COL: true, | ||
hr: true, | ||
HR: true, | ||
img: true, | ||
IMG: true, | ||
input: true, | ||
INPUT: true, | ||
link: true, | ||
LINK: true, | ||
meta: true, | ||
source: true | ||
META: true, | ||
source: true, | ||
SOURCE: true | ||
}; | ||
var kElementsClosedByOpening = { | ||
li: { li: true }, | ||
p: { p: true, div: true }, | ||
b: { div: true }, | ||
td: { td: true, th: true }, | ||
th: { td: true, th: true }, | ||
h1: { h1: true }, | ||
h2: { h2: true }, | ||
h3: { h3: true }, | ||
h4: { h4: true }, | ||
h5: { h5: true }, | ||
h6: { h6: true } | ||
li: { li: true, LI: true }, | ||
LI: { li: true, LI: true }, | ||
p: { p: true, div: true, P: true, DIV: true }, | ||
P: { p: true, div: true, P: true, DIV: true }, | ||
b: { div: true, DIV: true }, | ||
B: { div: true, DIV: true }, | ||
td: { td: true, th: true, TD: true, TH: true }, | ||
TD: { td: true, th: true, TD: true, TH: true }, | ||
th: { td: true, th: true, TD: true, TH: true }, | ||
TH: { td: true, th: true, TD: true, TH: true }, | ||
h1: { h1: true, H1: true }, | ||
H1: { h1: true, H1: true }, | ||
h2: { h2: true, H2: true }, | ||
H2: { h2: true, H2: true }, | ||
h3: { h3: true, H3: true }, | ||
H3: { h3: true, H3: true }, | ||
h4: { h4: true, H4: true }, | ||
H4: { h4: true, H4: true }, | ||
h5: { h5: true, H5: true }, | ||
H5: { h5: true, H5: true }, | ||
h6: { h6: true, H6: true }, | ||
H6: { h6: true, H6: true } | ||
}; | ||
var kElementsClosedByClosing = { | ||
li: { ul: true, ol: true }, | ||
a: { div: true }, | ||
b: { div: true }, | ||
i: { div: true }, | ||
p: { div: true }, | ||
td: { tr: true, table: true }, | ||
th: { tr: true, table: true } | ||
li: { ul: true, ol: true, UL: true, OL: true }, | ||
LI: { ul: true, ol: true, UL: true, OL: true }, | ||
a: { div: true, DIV: true }, | ||
A: { div: true, DIV: true }, | ||
b: { div: true, DIV: true }, | ||
B: { div: true, DIV: true }, | ||
i: { div: true, DIV: true }, | ||
I: { div: true, DIV: true }, | ||
p: { div: true, DIV: true }, | ||
P: { div: true, DIV: true }, | ||
td: { tr: true, table: true, TR: true, TABLE: true }, | ||
TD: { tr: true, table: true, TR: true, TABLE: true }, | ||
th: { tr: true, table: true, TR: true, TABLE: true }, | ||
TH: { tr: true, table: true, TR: true, TABLE: true } | ||
}; | ||
var kBlockTextElements = { | ||
script: true, | ||
SCRIPT: true, | ||
noscript: true, | ||
NOSCRIPT: true, | ||
style: true, | ||
pre: true | ||
STYLE: true, | ||
pre: true, | ||
PRE: true | ||
}; | ||
@@ -1100,3 +1171,3 @@ var frameflag = 'documentfragmentcontainer'; | ||
var attrs = {}; | ||
for (var attMatch = void 0; attMatch = kAttributePattern.exec(match[3]);) { | ||
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(match[3]));) { | ||
attrs[attMatch[2]] = attMatch[4] || attMatch[5] || attMatch[6]; | ||
@@ -1117,3 +1188,3 @@ } | ||
// a little test to find next </script> or </style> ... | ||
var closeMarkup_1 = '</' + match[2] + '>'; | ||
var closeMarkup_1 = "</" + match[2] + ">"; | ||
var index = (function () { | ||
@@ -1123,5 +1194,3 @@ if (options.lowerCaseTagName) { | ||
} | ||
else { | ||
return data.indexOf(closeMarkup_1, kMarkupPattern.lastIndex); | ||
} | ||
return data.indexOf(closeMarkup_1, kMarkupPattern.lastIndex); | ||
})(); | ||
@@ -1153,3 +1222,3 @@ if (options[match[2]]) { | ||
while (true) { | ||
if (currentParent.tagName === match[2]) { | ||
if (currentParent.tagName === match[2].toUpperCase()) { | ||
stack.pop(); | ||
@@ -1175,9 +1244,9 @@ currentParent = back_1.default(stack); | ||
}; | ||
while (match = kMarkupPattern.exec(data)) { | ||
while ((match = kMarkupPattern.exec(data))) { | ||
_loop_1(); | ||
} | ||
var valid = !!(stack.length === 1); | ||
var valid = Boolean(stack.length === 1); | ||
if (!options.noFix) { | ||
var response = root; | ||
response.valid = valid; | ||
var response_1 = root; | ||
response_1.valid = valid; | ||
var _loop_2 = function () { | ||
@@ -1211,3 +1280,3 @@ // Handle each error elements. | ||
} | ||
response.childNodes.forEach(function (node) { | ||
response_1.childNodes.forEach(function (node) { | ||
if (node instanceof HTMLElement) { | ||
@@ -1217,9 +1286,7 @@ node.parentNode = null; | ||
}); | ||
return response; | ||
return response_1; | ||
} | ||
else { | ||
var response = new text_1.default(data); | ||
response.valid = valid; | ||
return response; | ||
} | ||
var response = new text_1.default(data); | ||
response.valid = valid; | ||
return response; | ||
} | ||
@@ -1231,9 +1298,10 @@ exports.parse = parse; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.CommentNode = comment_2.default; | ||
exports.HTMLElement = html_1.default; | ||
exports.parse = html_1.parse; | ||
exports.default = html_1.parse; | ||
exports.Node = node_4.default; | ||
exports.TextNode = text_2.default; | ||
exports.NodeType = type_4.default; | ||
exports.NodeType = exports.TextNode = exports.Node = exports.default = exports.parse = exports.HTMLElement = exports.CommentNode = void 0; | ||
Object.defineProperty(exports, "CommentNode", { enumerable: true, get: function () { return __importDefault(comment_2).default; } }); | ||
Object.defineProperty(exports, "HTMLElement", { enumerable: true, get: function () { return __importDefault(html_1).default; } }); | ||
Object.defineProperty(exports, "parse", { enumerable: true, get: function () { return html_1.parse; } }); | ||
Object.defineProperty(exports, "default", { enumerable: true, get: function () { return html_1.parse; } }); | ||
Object.defineProperty(exports, "Node", { enumerable: true, get: function () { return __importDefault(node_4).default; } }); | ||
Object.defineProperty(exports, "TextNode", { enumerable: true, get: function () { return __importDefault(text_2).default; } }); | ||
Object.defineProperty(exports, "NodeType", { enumerable: true, get: function () { return __importDefault(type_4).default; } }); | ||
}); |
@@ -8,2 +8,11 @@ "use strict"; | ||
var pMatchFunctionCache = {}; | ||
function compare_tagname(tag1, tag2) { | ||
if (!tag1) { | ||
return !tag2; | ||
} | ||
if (!tag2) { | ||
return !tag1; | ||
} | ||
return tag1.toLowerCase() === tag2.toLowerCase(); | ||
} | ||
/** | ||
@@ -108,3 +117,3 @@ * Function cache | ||
classes = classes || []; | ||
if (el.tagName !== tagName) { | ||
if (!compare_tagname(el.tagName, tagName)) { | ||
return false; | ||
@@ -122,3 +131,3 @@ } | ||
tagName = tagName || ''; | ||
return el.tagName === tagName; | ||
return compare_tagname(el.tagName, tagName); | ||
}, | ||
@@ -128,5 +137,6 @@ f3: function (el, tagName) { | ||
tagName = tagName || ''; | ||
if (el.tagName !== tagName) { | ||
return false; | ||
} | ||
// if (el.tagName !== tagName) { | ||
// return false; | ||
// } | ||
return compare_tagname(el.tagName, tagName); | ||
} | ||
@@ -148,6 +158,6 @@ }; | ||
this.nextMatch = 0; | ||
functionCache.f5 = functionCache.f5; | ||
this.matchers = selector.split(' ').map(function (matcher) { | ||
if (pMatchFunctionCache[matcher]) | ||
if (pMatchFunctionCache[matcher]) { | ||
return pMatchFunctionCache[matcher]; | ||
} | ||
var parts = matcher.split('.'); | ||
@@ -172,2 +182,3 @@ var tagName = parts[0]; | ||
if (method !== '=' && method !== '!=') { | ||
// eslint-disable-next-line no-template-curly-in-string | ||
throw new Error('Selector not supported, Expect [key${op}value].op must be =,!='); | ||
@@ -182,3 +193,3 @@ } | ||
} | ||
else if (reg = /^\[(.*?)\]/.exec(tagName)) { | ||
else if ((reg = /^\[(.*?)\]/.exec(tagName))) { | ||
attr_key = reg[1]; | ||
@@ -207,3 +218,3 @@ function_name += '5'; | ||
// source = source || ''; | ||
return pMatchFunctionCache[matcher] = obj; | ||
return (pMatchFunctionCache[matcher] = obj); | ||
}); | ||
@@ -238,3 +249,3 @@ } | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -241,0 +252,0 @@ }); |
import Node from './node'; | ||
import NodeType from './type'; | ||
export default class CommentNode extends Node { | ||
constructor(value: string); | ||
rawText: string; | ||
constructor(rawText: string); | ||
/** | ||
@@ -6,0 +7,0 @@ * Node Type declaration. |
@@ -6,3 +6,3 @@ "use strict"; | ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || | ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; }; | ||
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; | ||
return extendStatics(d, b); | ||
@@ -24,4 +24,5 @@ }; | ||
__extends(CommentNode, _super); | ||
function CommentNode(value) { | ||
function CommentNode(rawText) { | ||
var _this = _super.call(this) || this; | ||
_this.rawText = rawText; | ||
/** | ||
@@ -32,3 +33,2 @@ * Node Type declaration. | ||
_this.nodeType = type_1.default.COMMENT_NODE; | ||
_this.rawText = value; | ||
return _this; | ||
@@ -44,3 +44,3 @@ } | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -47,0 +47,0 @@ }); |
@@ -26,3 +26,2 @@ import Node from './node'; | ||
export default class HTMLElement extends Node { | ||
tagName: string; | ||
private rawAttrs; | ||
@@ -32,2 +31,3 @@ parentNode: Node; | ||
private _rawAttrs; | ||
private _tag_name; | ||
id: string; | ||
@@ -58,2 +58,3 @@ classNames: string[]; | ||
exchangeChild(oldNode: Node, newNode: Node): void; | ||
get tagName(): string; | ||
/** | ||
@@ -60,0 +61,0 @@ * Get escpaed (as-it) text value of current node and its children. |
@@ -6,3 +6,3 @@ "use strict"; | ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || | ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; }; | ||
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; | ||
return extendStatics(d, b); | ||
@@ -16,2 +16,9 @@ }; | ||
})(); | ||
var __spreadArrays = (this && this.__spreadArrays) || function () { | ||
for (var s = 0, i = 0, il = arguments.length; i < il; i++) s += arguments[i].length; | ||
for (var r = Array(s), k = 0, i = 0; i < il; i++) | ||
for (var a = arguments[i], j = 0, jl = a.length; j < jl; j++, k++) | ||
r[k] = a[j]; | ||
return r; | ||
}; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
@@ -21,2 +28,3 @@ return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.parse = void 0; | ||
var he_1 = require("he"); | ||
@@ -30,12 +38,12 @@ var node_1 = __importDefault(require("./node")); | ||
var kBlockElements = { | ||
div: true, | ||
p: true, | ||
DIV: true, | ||
P: true, | ||
// ul: true, | ||
// ol: true, | ||
li: true, | ||
LI: true, | ||
// table: true, | ||
// tr: true, | ||
td: true, | ||
section: true, | ||
br: true | ||
TD: true, | ||
SECTION: true, | ||
BR: true | ||
}; | ||
@@ -64,3 +72,2 @@ /** | ||
var _this = _super.call(this) || this; | ||
_this.tagName = tagName; | ||
_this.rawAttrs = rawAttrs; | ||
@@ -73,2 +80,3 @@ _this.parentNode = parentNode; | ||
_this.nodeType = type_1.default.ELEMENT_NODE; | ||
_this._tag_name = tagName; | ||
_this.rawAttrs = rawAttrs || ''; | ||
@@ -121,2 +129,9 @@ _this.parentNode = parentNode || null; | ||
}; | ||
Object.defineProperty(HTMLElement.prototype, "tagName", { | ||
get: function () { | ||
return this._tag_name ? this._tag_name.toUpperCase() : this._tag_name; | ||
}, | ||
enumerable: false, | ||
configurable: true | ||
}); | ||
Object.defineProperty(HTMLElement.prototype, "rawText", { | ||
@@ -129,6 +144,6 @@ /** | ||
return this.childNodes.reduce(function (pre, cur) { | ||
return pre += cur.rawText; | ||
return (pre += cur.rawText); | ||
}, ''); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -144,3 +159,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -179,3 +194,3 @@ }); | ||
if (currentBlock.prependWhitespace) { | ||
text = ' ' + text; | ||
text = " " + text; | ||
currentBlock.prependWhitespace = false; | ||
@@ -188,4 +203,3 @@ } | ||
dfs(this); | ||
return blocks | ||
.map(function (block) { | ||
return blocks.map(function (block) { | ||
// Normalize each line's whitespace | ||
@@ -196,20 +210,16 @@ return block.join('').trim().replace(/\s{2,}/g, ' '); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
}); | ||
HTMLElement.prototype.toString = function () { | ||
var tag = this.tagName; | ||
var tag = this._tag_name; | ||
if (tag) { | ||
var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag); | ||
var attrs = this.rawAttrs ? ' ' + this.rawAttrs : ''; | ||
var attrs = this.rawAttrs ? " " + this.rawAttrs : ''; | ||
if (is_void) { | ||
return "<" + tag + attrs + ">"; | ||
} | ||
else { | ||
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">"; | ||
} | ||
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">"; | ||
} | ||
else { | ||
return this.innerHTML; | ||
} | ||
return this.innerHTML; | ||
}; | ||
@@ -222,3 +232,3 @@ Object.defineProperty(HTMLElement.prototype, "innerHTML", { | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -241,3 +251,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -279,5 +289,5 @@ }); | ||
function dfs(node) { | ||
var idStr = node.id ? ('#' + node.id) : ''; | ||
var classStr = node.classNames.length ? ('.' + node.classNames.join('.')) : ''; | ||
write(node.tagName + idStr + classStr); | ||
var idStr = node.id ? ("#" + node.id) : ''; | ||
var classStr = node.classNames.length ? ("." + node.classNames.join('.')) : ''; | ||
write(node._tag_name + idStr + classStr); | ||
indention++; | ||
@@ -289,4 +299,5 @@ node.childNodes.forEach(function (childNode) { | ||
else if (childNode.nodeType === type_1.default.TEXT_NODE) { | ||
if (!childNode.isWhitespace) | ||
if (!childNode.isWhitespace) { | ||
write('#text'); | ||
} | ||
} | ||
@@ -299,3 +310,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -427,4 +438,5 @@ }); | ||
else { | ||
if (state[2]) | ||
if (state[2]) { | ||
matcher.rewind(); | ||
} | ||
stack.pop(); | ||
@@ -457,3 +469,3 @@ } | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -469,3 +481,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -490,3 +502,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -500,9 +512,10 @@ }); | ||
get: function () { | ||
if (this._rawAttrs) | ||
if (this._rawAttrs) { | ||
return this._rawAttrs; | ||
} | ||
var attrs = {}; | ||
if (this.rawAttrs) { | ||
var re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig; | ||
var re = /\b([a-z][a-z0-9-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig; | ||
var match = void 0; | ||
while (match = re.exec(this.rawAttrs)) { | ||
while ((match = re.exec(this.rawAttrs))) { | ||
attrs[match[1]] = match[2] || match[3] || match[4] || null; | ||
@@ -514,3 +527,3 @@ } | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -531,5 +544,3 @@ }); | ||
} | ||
else { | ||
return name + '=' + val; | ||
} | ||
return name + "=" + val; | ||
}).join(' '); | ||
@@ -567,5 +578,3 @@ }; | ||
} | ||
else { | ||
return name + '=' + val; | ||
} | ||
return name + "=" + val; | ||
}).join(' '); | ||
@@ -592,9 +601,7 @@ }; | ||
} | ||
else { | ||
return name + '=' + JSON.stringify(String(val)); | ||
} | ||
return name + "=" + JSON.stringify(String(val)); | ||
}).join(' '); | ||
}; | ||
HTMLElement.prototype.insertAdjacentHTML = function (where, html) { | ||
var _a, _b; | ||
var _a, _b, _c; | ||
var _this = this; | ||
@@ -606,8 +613,14 @@ if (arguments.length < 2) { | ||
if (where === 'afterend') { | ||
var idx = this.parentNode.childNodes.findIndex(function (child) { | ||
return child === _this; | ||
}); | ||
(_a = this.parentNode.childNodes).splice.apply(_a, __spreadArrays([idx + 1, 0], p.childNodes)); | ||
p.childNodes.forEach(function (n) { | ||
_this.parentNode.appendChild(n); | ||
if (n instanceof HTMLElement) { | ||
n.parentNode = _this.parentNode; | ||
} | ||
}); | ||
} | ||
else if (where === 'afterbegin') { | ||
(_a = this.childNodes).unshift.apply(_a, p.childNodes); | ||
(_b = this.childNodes).unshift.apply(_b, p.childNodes); | ||
} | ||
@@ -620,3 +633,11 @@ else if (where === 'beforeend') { | ||
else if (where === 'beforebegin') { | ||
(_b = this.parentNode.childNodes).unshift.apply(_b, p.childNodes); | ||
var idx = this.parentNode.childNodes.findIndex(function (child) { | ||
return child === _this; | ||
}); | ||
(_c = this.parentNode.childNodes).splice.apply(_c, __spreadArrays([idx, 0], p.childNodes)); | ||
p.childNodes.forEach(function (n) { | ||
if (n instanceof HTMLElement) { | ||
n.parentNode = _this.parentNode; | ||
} | ||
}); | ||
} | ||
@@ -626,5 +647,5 @@ else { | ||
} | ||
if (!where || html === undefined || html === null) { | ||
return; | ||
} | ||
// if (!where || html === undefined || html === null) { | ||
// return; | ||
// } | ||
}; | ||
@@ -639,39 +660,71 @@ return HTMLElement; | ||
area: true, | ||
AREA: true, | ||
base: true, | ||
BASE: true, | ||
br: true, | ||
BR: true, | ||
col: true, | ||
COL: true, | ||
hr: true, | ||
HR: true, | ||
img: true, | ||
IMG: true, | ||
input: true, | ||
INPUT: true, | ||
link: true, | ||
LINK: true, | ||
meta: true, | ||
source: true | ||
META: true, | ||
source: true, | ||
SOURCE: true | ||
}; | ||
var kElementsClosedByOpening = { | ||
li: { li: true }, | ||
p: { p: true, div: true }, | ||
b: { div: true }, | ||
td: { td: true, th: true }, | ||
th: { td: true, th: true }, | ||
h1: { h1: true }, | ||
h2: { h2: true }, | ||
h3: { h3: true }, | ||
h4: { h4: true }, | ||
h5: { h5: true }, | ||
h6: { h6: true } | ||
li: { li: true, LI: true }, | ||
LI: { li: true, LI: true }, | ||
p: { p: true, div: true, P: true, DIV: true }, | ||
P: { p: true, div: true, P: true, DIV: true }, | ||
b: { div: true, DIV: true }, | ||
B: { div: true, DIV: true }, | ||
td: { td: true, th: true, TD: true, TH: true }, | ||
TD: { td: true, th: true, TD: true, TH: true }, | ||
th: { td: true, th: true, TD: true, TH: true }, | ||
TH: { td: true, th: true, TD: true, TH: true }, | ||
h1: { h1: true, H1: true }, | ||
H1: { h1: true, H1: true }, | ||
h2: { h2: true, H2: true }, | ||
H2: { h2: true, H2: true }, | ||
h3: { h3: true, H3: true }, | ||
H3: { h3: true, H3: true }, | ||
h4: { h4: true, H4: true }, | ||
H4: { h4: true, H4: true }, | ||
h5: { h5: true, H5: true }, | ||
H5: { h5: true, H5: true }, | ||
h6: { h6: true, H6: true }, | ||
H6: { h6: true, H6: true } | ||
}; | ||
var kElementsClosedByClosing = { | ||
li: { ul: true, ol: true }, | ||
a: { div: true }, | ||
b: { div: true }, | ||
i: { div: true }, | ||
p: { div: true }, | ||
td: { tr: true, table: true }, | ||
th: { tr: true, table: true } | ||
li: { ul: true, ol: true, UL: true, OL: true }, | ||
LI: { ul: true, ol: true, UL: true, OL: true }, | ||
a: { div: true, DIV: true }, | ||
A: { div: true, DIV: true }, | ||
b: { div: true, DIV: true }, | ||
B: { div: true, DIV: true }, | ||
i: { div: true, DIV: true }, | ||
I: { div: true, DIV: true }, | ||
p: { div: true, DIV: true }, | ||
P: { div: true, DIV: true }, | ||
td: { tr: true, table: true, TR: true, TABLE: true }, | ||
TD: { tr: true, table: true, TR: true, TABLE: true }, | ||
th: { tr: true, table: true, TR: true, TABLE: true }, | ||
TH: { tr: true, table: true, TR: true, TABLE: true } | ||
}; | ||
var kBlockTextElements = { | ||
script: true, | ||
SCRIPT: true, | ||
noscript: true, | ||
NOSCRIPT: true, | ||
style: true, | ||
pre: true | ||
STYLE: true, | ||
pre: true, | ||
PRE: true | ||
}; | ||
@@ -715,3 +768,3 @@ var frameflag = 'documentfragmentcontainer'; | ||
var attrs = {}; | ||
for (var attMatch = void 0; attMatch = kAttributePattern.exec(match[3]);) { | ||
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(match[3]));) { | ||
attrs[attMatch[2]] = attMatch[4] || attMatch[5] || attMatch[6]; | ||
@@ -732,3 +785,3 @@ } | ||
// a little test to find next </script> or </style> ... | ||
var closeMarkup_1 = '</' + match[2] + '>'; | ||
var closeMarkup_1 = "</" + match[2] + ">"; | ||
var index = (function () { | ||
@@ -738,5 +791,3 @@ if (options.lowerCaseTagName) { | ||
} | ||
else { | ||
return data.indexOf(closeMarkup_1, kMarkupPattern.lastIndex); | ||
} | ||
return data.indexOf(closeMarkup_1, kMarkupPattern.lastIndex); | ||
})(); | ||
@@ -768,3 +819,3 @@ if (options[match[2]]) { | ||
while (true) { | ||
if (currentParent.tagName === match[2]) { | ||
if (currentParent.tagName === match[2].toUpperCase()) { | ||
stack.pop(); | ||
@@ -790,9 +841,9 @@ currentParent = back_1.default(stack); | ||
}; | ||
while (match = kMarkupPattern.exec(data)) { | ||
while ((match = kMarkupPattern.exec(data))) { | ||
_loop_1(); | ||
} | ||
var valid = !!(stack.length === 1); | ||
var valid = Boolean(stack.length === 1); | ||
if (!options.noFix) { | ||
var response = root; | ||
response.valid = valid; | ||
var response_1 = root; | ||
response_1.valid = valid; | ||
var _loop_2 = function () { | ||
@@ -826,3 +877,3 @@ // Handle each error elements. | ||
} | ||
response.childNodes.forEach(function (node) { | ||
response_1.childNodes.forEach(function (node) { | ||
if (node instanceof HTMLElement) { | ||
@@ -832,10 +883,8 @@ node.parentNode = null; | ||
}); | ||
return response; | ||
return response_1; | ||
} | ||
else { | ||
var response = new text_1.default(data); | ||
response.valid = valid; | ||
return response; | ||
} | ||
var response = new text_1.default(data); | ||
response.valid = valid; | ||
return response; | ||
} | ||
exports.parse = parse; |
@@ -6,7 +6,8 @@ import NodeType from './type'; | ||
export default abstract class Node { | ||
nodeType: NodeType; | ||
abstract nodeType: NodeType; | ||
childNodes: Node[]; | ||
text: string; | ||
rawText: string; | ||
abstract text: string; | ||
abstract rawText: string; | ||
abstract toString(): string; | ||
get innerText(): string; | ||
} |
@@ -10,4 +10,11 @@ "use strict"; | ||
} | ||
Object.defineProperty(Node.prototype, "innerText", { | ||
get: function () { | ||
return this.rawText; | ||
}, | ||
enumerable: false, | ||
configurable: true | ||
}); | ||
return Node; | ||
}()); | ||
exports.default = Node; |
@@ -8,3 +8,4 @@ import NodeType from './type'; | ||
export default class TextNode extends Node { | ||
constructor(value: string); | ||
rawText: string; | ||
constructor(rawText: string); | ||
/** | ||
@@ -11,0 +12,0 @@ * Node Type declaration. |
@@ -6,3 +6,3 @@ "use strict"; | ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || | ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; }; | ||
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; | ||
return extendStatics(d, b); | ||
@@ -28,4 +28,5 @@ }; | ||
__extends(TextNode, _super); | ||
function TextNode(value) { | ||
function TextNode(rawText) { | ||
var _this = _super.call(this) || this; | ||
_this.rawText = rawText; | ||
/** | ||
@@ -36,3 +37,2 @@ * Node Type declaration. | ||
_this.nodeType = type_1.default.TEXT_NODE; | ||
_this.rawText = value; | ||
return _this; | ||
@@ -48,3 +48,3 @@ } | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -60,3 +60,3 @@ }); | ||
}, | ||
enumerable: true, | ||
enumerable: false, | ||
configurable: true | ||
@@ -63,0 +63,0 @@ }); |
{ | ||
"name": "node-html-parser", | ||
"version": "1.2.20", | ||
"version": "1.2.21", | ||
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.", | ||
@@ -26,3 +26,3 @@ "main": "dist/index.js", | ||
"dependencies": { | ||
"he": "1.1.1" | ||
"he": "1.2.0" | ||
}, | ||
@@ -39,2 +39,4 @@ "devDependencies": { | ||
"eslint": "latest", | ||
"eslint-config-prettier": "latest", | ||
"eslint-plugin-import": "latest", | ||
"high5": "^1.0.0", | ||
@@ -44,9 +46,7 @@ "htmlparser": "^1.7.7", | ||
"htmlparser2": "^4.1.0", | ||
"libxmljs": "^0.19.7", | ||
"mocha": "latest", | ||
"parse5": "^6.0.0", | ||
"parse5": "^6.0.1", | ||
"should": "latest", | ||
"spec": "latest", | ||
"travis-cov": "latest", | ||
"tslint": "^5.0.0", | ||
"typescript": "next" | ||
@@ -53,0 +53,0 @@ }, |
@@ -66,3 +66,3 @@ # Fast HTML Parser [![NPM version](https://badge.fury.io/js/node-html-parser.png)](http://badge.fury.io/js/node-html-parser) [![Build Status](https://travis-ci.org/taoqf/node-html-parser.svg?branch=master)](https://travis-ci.org/taoqf/node-html-parser) | ||
## API | ||
## HTMLElement Methods | ||
@@ -86,16 +86,2 @@ ### parse(data[, options]) | ||
### HTMLElement#text | ||
Get unescaped text value of current node and its children. Like `innerText`. | ||
(slow for the first time) | ||
### HTMLElement#rawText | ||
Get escpaed (as-it) text value of current node and its children. May have | ||
`&` in it. (fast) | ||
### HTMLElement#structuredText | ||
Get structured Text | ||
### HTMLElement#trimRight() | ||
@@ -105,6 +91,2 @@ | ||
### HTMLElement#structure | ||
Get DOM structure | ||
### HTMLElement#removeWhitespace() | ||
@@ -134,10 +116,2 @@ | ||
### HTMLElement#firstChild | ||
Get first child node | ||
### HTMLElement#lastChild | ||
Get last child node | ||
### HTMLElement#setAttribute(key: string, value: string) | ||
@@ -153,4 +127,12 @@ | ||
Get `key` attrubte. | ||
Get `key` attribute. | ||
### HTMLElement#exchangeChild(oldNode: Node, newNode: Node) | ||
Exchanges given child with new child. | ||
### HTMLElement#removeChild(node: Node) | ||
Remove child node. | ||
### HTMLElement#toString() | ||
@@ -160,2 +142,35 @@ | ||
### HTMLElement#set_content(content: string | Node | Node[]) | ||
Set content. **Notice**: Do not set content of the **root** node. | ||
## HTMLElement Properties | ||
### HTMLElement#text | ||
Get unescaped text value of current node and its children. Like `innerText`. | ||
(slow for the first time) | ||
### HTMLElement#rawText | ||
Get escpaed (as-it) text value of current node and its children. May have | ||
`&` in it. (fast) | ||
### HTMLElement#structuredText | ||
Get structured Text | ||
### HTMLElement#structure | ||
Get DOM structure | ||
### HTMLElement#firstChild | ||
Get first child node | ||
### HTMLElement#lastChild | ||
Get last child node | ||
### HTMLElement#innerHTML | ||
@@ -169,4 +184,1 @@ | ||
### HTMLElement#set_content(content: string | Node | Node[]) | ||
Set content. **Notice**: Do not set content of the **root** node. |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Deprecated
MaintenanceThe maintainer of the package marked it as deprecated. This could indicate that a single version should not be used, or that the package is no longer maintained and any new vulnerabilities will not be fixed.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
3040
178
115728
1