html-dom-parser
Advanced tools
Comparing version 0.3.1 to 0.4.0
@@ -5,2 +5,11 @@ # Change Log | ||
# [0.4.0](https://github.com/remarkablemark/html-dom-parser/compare/v0.3.1...v0.4.0) (2020-12-13) | ||
### Features | ||
* upgrade `domhandler` to 3.0.0 and `htmlparser` to 4.0.0 ([44dba5e](https://github.com/remarkablemark/html-dom-parser/commit/44dba5efb5cc89668fc064d844c09079560029e1)) | ||
## [0.3.1](https://github.com/remarkablemark/html-dom-parser/compare/v0.3.0...v0.3.1) (2020-12-13) | ||
@@ -7,0 +16,0 @@ |
@@ -50,6 +50,214 @@ (function (global, factory) { | ||
var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {}; | ||
var __extends = (commonjsGlobal && commonjsGlobal.__extends) || (function () { | ||
var extendStatics = function (d, b) { | ||
extendStatics = Object.setPrototypeOf || | ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || | ||
function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; }; | ||
return extendStatics(d, b); | ||
}; | ||
return function (d, b) { | ||
extendStatics(d, b); | ||
function __() { this.constructor = d; } | ||
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); | ||
}; | ||
})(); | ||
var nodeTypes = new Map([ | ||
["tag" /* Tag */, 1], | ||
["script" /* Script */, 1], | ||
["style" /* Style */, 1], | ||
["directive" /* Directive */, 1], | ||
["text" /* Text */, 3], | ||
["cdata" /* CDATA */, 4], | ||
["comment" /* Comment */, 8] | ||
]); | ||
// This object will be used as the prototype for Nodes when creating a | ||
// DOM-Level-1-compliant structure. | ||
var Node = /** @class */ (function () { | ||
/** | ||
* | ||
* @param type The type of the node. | ||
*/ | ||
function Node(type) { | ||
this.type = type; | ||
/** Parent of the node */ | ||
this.parent = null; | ||
/** Previous sibling */ | ||
this.prev = null; | ||
/** Next sibling */ | ||
this.next = null; | ||
/** The start index of the node. Requires `withStartIndices` on the handler to be `true. */ | ||
this.startIndex = null; | ||
/** The end index of the node. Requires `withEndIndices` on the handler to be `true. */ | ||
this.endIndex = null; | ||
} | ||
Object.defineProperty(Node.prototype, "nodeType", { | ||
// Read-only aliases | ||
get: function () { | ||
return nodeTypes.get(this.type) || 1; | ||
}, | ||
enumerable: true, | ||
configurable: true | ||
}); | ||
Object.defineProperty(Node.prototype, "parentNode", { | ||
// Read-write aliases for properties | ||
get: function () { | ||
return this.parent || null; | ||
}, | ||
set: function (parent) { | ||
this.parent = parent; | ||
}, | ||
enumerable: true, | ||
configurable: true | ||
}); | ||
Object.defineProperty(Node.prototype, "previousSibling", { | ||
get: function () { | ||
return this.prev || null; | ||
}, | ||
set: function (prev) { | ||
this.prev = prev; | ||
}, | ||
enumerable: true, | ||
configurable: true | ||
}); | ||
Object.defineProperty(Node.prototype, "nextSibling", { | ||
get: function () { | ||
return this.next || null; | ||
}, | ||
set: function (next) { | ||
this.next = next; | ||
}, | ||
enumerable: true, | ||
configurable: true | ||
}); | ||
return Node; | ||
}()); | ||
var Node_1 = Node; | ||
var DataNode = /** @class */ (function (_super) { | ||
__extends(DataNode, _super); | ||
/** | ||
* | ||
* @param type The type of the node | ||
* @param data The content of the data node | ||
*/ | ||
function DataNode(type, data) { | ||
var _this = _super.call(this, type) || this; | ||
_this.data = data; | ||
return _this; | ||
} | ||
Object.defineProperty(DataNode.prototype, "nodeValue", { | ||
get: function () { | ||
return this.data; | ||
}, | ||
set: function (data) { | ||
this.data = data; | ||
}, | ||
enumerable: true, | ||
configurable: true | ||
}); | ||
return DataNode; | ||
}(Node)); | ||
var DataNode_1 = DataNode; | ||
var ProcessingInstruction = /** @class */ (function (_super) { | ||
__extends(ProcessingInstruction, _super); | ||
function ProcessingInstruction(name, data) { | ||
var _this = _super.call(this, "directive" /* Directive */, data) || this; | ||
_this.name = name; | ||
return _this; | ||
} | ||
return ProcessingInstruction; | ||
}(DataNode)); | ||
var ProcessingInstruction_1 = ProcessingInstruction; | ||
var NodeWithChildren = /** @class */ (function (_super) { | ||
__extends(NodeWithChildren, _super); | ||
/** | ||
* | ||
* @param type Type of the node. | ||
* @param children Children of the node. Only certain node types can have children. | ||
*/ | ||
function NodeWithChildren(type, children) { | ||
var _this = _super.call(this, type) || this; | ||
_this.children = children; | ||
return _this; | ||
} | ||
Object.defineProperty(NodeWithChildren.prototype, "firstChild", { | ||
// Aliases | ||
get: function () { | ||
return this.children[0] || null; | ||
}, | ||
enumerable: true, | ||
configurable: true | ||
}); | ||
Object.defineProperty(NodeWithChildren.prototype, "lastChild", { | ||
get: function () { | ||
return this.children[this.children.length - 1] || null; | ||
}, | ||
enumerable: true, | ||
configurable: true | ||
}); | ||
Object.defineProperty(NodeWithChildren.prototype, "childNodes", { | ||
get: function () { | ||
return this.children; | ||
}, | ||
set: function (children) { | ||
this.children = children; | ||
}, | ||
enumerable: true, | ||
configurable: true | ||
}); | ||
return NodeWithChildren; | ||
}(Node)); | ||
var NodeWithChildren_1 = NodeWithChildren; | ||
var Element = /** @class */ (function (_super) { | ||
__extends(Element, _super); | ||
/** | ||
* | ||
* @param name Name of the tag, eg. `div`, `span` | ||
* @param attribs Object mapping attribute names to attribute values | ||
*/ | ||
function Element(name, attribs) { | ||
var _this = _super.call(this, name === "script" | ||
? "script" /* Script */ | ||
: name === "style" | ||
? "style" /* Style */ | ||
: "tag" /* Tag */, []) || this; | ||
_this.name = name; | ||
_this.attribs = attribs; | ||
_this.attribs = attribs; | ||
return _this; | ||
} | ||
Object.defineProperty(Element.prototype, "tagName", { | ||
// DOM Level 1 aliases | ||
get: function () { | ||
return this.name; | ||
}, | ||
set: function (name) { | ||
this.name = name; | ||
}, | ||
enumerable: true, | ||
configurable: true | ||
}); | ||
return Element; | ||
}(NodeWithChildren)); | ||
var Element_1 = Element; | ||
var node = /*#__PURE__*/Object.defineProperty({ | ||
Node: Node_1, | ||
DataNode: DataNode_1, | ||
ProcessingInstruction: ProcessingInstruction_1, | ||
NodeWithChildren: NodeWithChildren_1, | ||
Element: Element_1 | ||
}, '__esModule', {value: true}); | ||
var CASE_SENSITIVE_TAG_NAMES$1 = constants.CASE_SENSITIVE_TAG_NAMES; | ||
var Element$1 = node.Element; | ||
var DataNode$1 = node.DataNode; | ||
var ProcessingInstruction$1 = node.ProcessingInstruction; | ||
var caseSensitiveTagNamesMap = {}; | ||
var tagName; | ||
for (var i = 0, len = CASE_SENSITIVE_TAG_NAMES$1.length; i < len; i++) { | ||
@@ -106,87 +314,67 @@ tagName = CASE_SENSITIVE_TAG_NAMES$1[i]; | ||
* | ||
* @param {NodeList} nodes - DOM nodes. | ||
* @param {object} [parentNode] - Formatted parent node. | ||
* @param {string} [directive] - Directive. | ||
* @return {DomElement[]} - Formatted DOM object. | ||
* @param {NodeList} nodes - DOM nodes. | ||
* @param {DataNode|Element} [parentNode] - Formatted parent node. | ||
* @param {string} [directive] - Directive. | ||
* @return {Array<DomNode|Element>} - Formatted DOM object. | ||
*/ | ||
function formatDOM(nodes, parentNode, directive) { | ||
function formatDOM(domNodes, parentNode, directive) { | ||
parentNode = parentNode || null; | ||
var result = []; | ||
var domNode; | ||
var node; | ||
var prevNode; | ||
var nodeObj; | ||
var output = []; | ||
// `NodeList` is array-like | ||
for (var i = 0, len = nodes.length; i < len; i++) { | ||
node = nodes[i]; | ||
// reset | ||
nodeObj = { | ||
next: null, | ||
prev: result[i - 1] || null, | ||
parent: parentNode | ||
}; | ||
for (var i = 0, len = domNodes.length; i < len; i++) { | ||
domNode = domNodes[i]; | ||
// set the next node for the previous node (if applicable) | ||
prevNode = result[i - 1]; | ||
if (prevNode) { | ||
prevNode.next = nodeObj; | ||
} | ||
// set the node name if it's not "#text" or "#comment" | ||
// e.g., "div" | ||
if (node.nodeName[0] !== '#') { | ||
nodeObj.name = formatTagName(node.nodeName); | ||
// also, nodes of type "tag" have "attribs" | ||
nodeObj.attribs = {}; // default | ||
if (node.attributes && node.attributes.length) { | ||
nodeObj.attribs = formatAttributes(node.attributes); | ||
} | ||
} | ||
// set the node type | ||
// e.g., "tag" | ||
switch (node.nodeType) { | ||
// 1 = element | ||
// set the node data given the type | ||
switch (domNode.nodeType) { | ||
case 1: | ||
if (nodeObj.name === 'script' || nodeObj.name === 'style') { | ||
nodeObj.type = nodeObj.name; | ||
} else { | ||
nodeObj.type = 'tag'; | ||
} | ||
// recursively format the children | ||
nodeObj.children = formatDOM(node.childNodes, nodeObj); | ||
// script, style, or tag | ||
node = new Element$1( | ||
formatTagName(domNode.nodeName), | ||
formatAttributes(domNode.attributes) | ||
); | ||
node.children = formatDOM(domNode.childNodes, node); | ||
break; | ||
// 2 = attribute | ||
// 3 = text | ||
case 3: | ||
nodeObj.type = 'text'; | ||
nodeObj.data = node.nodeValue; | ||
node = new DataNode$1('text', domNode.nodeValue); | ||
break; | ||
// 8 = comment | ||
case 8: | ||
nodeObj.type = 'comment'; | ||
nodeObj.data = node.nodeValue; | ||
node = new DataNode$1('comment', domNode.nodeValue); | ||
break; | ||
} | ||
result.push(nodeObj); | ||
// set next for previous node | ||
prevNode = output[i - 1] || null; | ||
if (prevNode) { | ||
prevNode.next = node; | ||
} | ||
// set properties for current node | ||
node.parent = parentNode; | ||
node.prev = prevNode; | ||
node.next = null; | ||
output.push(node); | ||
} | ||
if (directive) { | ||
result.unshift({ | ||
name: directive.substring(0, directive.indexOf(' ')).toLowerCase(), | ||
data: directive, | ||
type: 'directive', | ||
next: result[0] ? result[0] : null, | ||
prev: null, | ||
parent: parentNode | ||
}); | ||
node = new ProcessingInstruction$1( | ||
directive.substring(0, directive.indexOf(' ')).toLowerCase(), | ||
directive | ||
); | ||
node.next = output[0] || null; | ||
node.parent = parentNode; | ||
output.unshift(node); | ||
if (result[1]) { | ||
result[1].prev = result[0]; | ||
if (output[1]) { | ||
output[1].prev = output[0]; | ||
} | ||
} | ||
return result; | ||
return output; | ||
} | ||
@@ -193,0 +381,0 @@ |
@@ -1,2 +0,2 @@ | ||
!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?module.exports=t():"function"==typeof define&&define.amd?define(t):(e="undefined"!=typeof globalThis?globalThis:e||self).HTMLDOMParser=t()}(this,(function(){"use strict";for(var e,t=["animateMotion","animateTransform","clipPath","feBlend","feColorMatrix","feComponentTransfer","feComposite","feConvolveMatrix","feDiffuseLighting","feDisplacementMap","feDropShadow","feFlood","feFuncA","feFuncB","feFuncG","feFuncR","feGaussainBlur","feImage","feMerge","feMergeNode","feMorphology","feOffset","fePointLight","feSpecularLighting","feSpotLight","feTile","feTurbulence","foreignObject","linearGradient","radialGradient","textPath"],n={},r=0,a=t.length;r<a;r++)n[(e=t[r]).toLowerCase()]=e;function o(e){for(var t,n={},r=0,a=e.length;r<a;r++)n[(t=e[r]).name]=t.value;return n}function i(e){var t=function(e){return n[e]}(e=e.toLowerCase());return t||e}var s={formatAttributes:o,formatDOM:function e(t,n,r){n=n||null;for(var a,s,u,f=[],m=0,l=t.length;m<l;m++){switch(a=t[m],u={next:null,prev:f[m-1]||null,parent:n},(s=f[m-1])&&(s.next=u),"#"!==a.nodeName[0]&&(u.name=i(a.nodeName),u.attribs={},a.attributes&&a.attributes.length&&(u.attribs=o(a.attributes))),a.nodeType){case 1:"script"===u.name||"style"===u.name?u.type=u.name:u.type="tag",u.children=e(a.childNodes,u);break;case 3:u.type="text",u.data=a.nodeValue;break;case 8:u.type="comment",u.data=a.nodeValue}f.push(u)}return r&&(f.unshift({name:r.substring(0,r.indexOf(" ")).toLowerCase(),data:r,type:"directive",next:f[0]?f[0]:null,prev:null,parent:n}),f[1]&&(f[1].prev=f[0])),f},isIE:function(e){return e?document.documentMode===e:/(MSIE |Trident\/|Edge\/)/.test(navigator.userAgent)}},u="html",f="head",m="body",l=/<([a-zA-Z]+[0-9]?)/,c=/<head.*>/i,d=/<body.*>/i,p=/<(area|base|br|col|embed|hr|img|input|keygen|link|menuitem|meta|param|source|track|wbr)(.*?)\/?>/gi,g=s.isIE(9),h=g||s.isIE(),y=function(){throw new Error("This browser does not support `document.implementation.createHTMLDocument`")},b=function(){throw new Error("This browser does not support `DOMParser.prototype.parseFromString`")};if("function"==typeof window.DOMParser){var T=new window.DOMParser,v=g?"text/xml":"text/html";y=b=function(e,t){return t&&(e="<"+t+">"+e+"</"+t+">"),g&&(e=e.replace(p,"<$1$2$3/>")),T.parseFromString(e,v)}}if(document.implementation){var w=document.implementation.createHTMLDocument(h?"html-dom-parser":void 0);y=function(e,t){if(t)return w.documentElement.getElementsByTagName(t)[0].innerHTML=e,w;try{return w.documentElement.innerHTML=e,w}catch(t){if(b)return b(e)}}}var M,E=document.createElement("template");E.content&&(M=function(e){return E.innerHTML=e,E.content.childNodes});var N=function(e){var t,n,r,a,o=e.match(l);switch(o&&o[1]&&(t=o[1].toLowerCase()),t){case u:return n=b(e),c.test(e)||(r=n.getElementsByTagName(f)[0])&&r.parentNode.removeChild(r),d.test(e)||(r=n.getElementsByTagName(m)[0])&&r.parentNode.removeChild(r),n.getElementsByTagName(u);case f:case m:return a=y(e).getElementsByTagName(t),d.test(e)&&c.test(e)?a[0].parentNode.childNodes:a;default:return M?M(e):y(e,m).getElementsByTagName(m)[0].childNodes}},L=s.formatDOM,x=s.isIE(9),D=/<(![a-zA-Z\s]+)>/;return function(e){if("string"!=typeof e)throw new TypeError("First argument must be a string");if(!e)return[];var t,n=e.match(D);return n&&n[1]&&(t=n[1],x&&(e=e.replace(n[0],""))),L(N(e),null,t)}})); | ||
!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?module.exports=t():"function"==typeof define&&define.amd?define(t):(e="undefined"!=typeof globalThis?globalThis:e||self).HTMLDOMParser=t()}(this,(function(){"use strict";for(var e,t,n=["animateMotion","animateTransform","clipPath","feBlend","feColorMatrix","feComponentTransfer","feComposite","feConvolveMatrix","feDiffuseLighting","feDisplacementMap","feDropShadow","feFlood","feFuncA","feFuncB","feFuncG","feFuncR","feGaussainBlur","feImage","feMerge","feMergeNode","feMorphology","feOffset","fePointLight","feSpecularLighting","feSpotLight","feTile","feTurbulence","foreignObject","linearGradient","radialGradient","textPath"],r="undefined"!=typeof globalThis?globalThis:"undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},o=r&&r.__extends||(e=function(t,n){return(e=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)t.hasOwnProperty(n)&&(e[n]=t[n])})(t,n)},function(t,n){function r(){this.constructor=t}e(t,n),t.prototype=null===n?Object.create(n):(r.prototype=n.prototype,new r)}),i=new Map([["tag",1],["script",1],["style",1],["directive",1],["text",3],["cdata",4],["comment",8]]),a=function(){function e(e){this.type=e,this.parent=null,this.prev=null,this.next=null,this.startIndex=null,this.endIndex=null}return Object.defineProperty(e.prototype,"nodeType",{get:function(){return i.get(this.type)||1},enumerable:!0,configurable:!0}),Object.defineProperty(e.prototype,"parentNode",{get:function(){return this.parent||null},set:function(e){this.parent=e},enumerable:!0,configurable:!0}),Object.defineProperty(e.prototype,"previousSibling",{get:function(){return this.prev||null},set:function(e){this.prev=e},enumerable:!0,configurable:!0}),Object.defineProperty(e.prototype,"nextSibling",{get:function(){return this.next||null},set:function(e){this.next=e},enumerable:!0,configurable:!0}),e}(),u=a,l=function(e){function t(t,n){var r=e.call(this,t)||this;return r.data=n,r}return o(t,e),Object.defineProperty(t.prototype,"nodeValue",{get:function(){return this.data},set:function(e){this.data=e},enumerable:!0,configurable:!0}),t}(a),c=l,s=function(e){function t(t,n){var r=e.call(this,"directive",n)||this;return r.name=t,r}return o(t,e),t}(l),f=function(e){function t(t,n){var r=e.call(this,t)||this;return r.children=n,r}return o(t,e),Object.defineProperty(t.prototype,"firstChild",{get:function(){return this.children[0]||null},enumerable:!0,configurable:!0}),Object.defineProperty(t.prototype,"lastChild",{get:function(){return this.children[this.children.length-1]||null},enumerable:!0,configurable:!0}),Object.defineProperty(t.prototype,"childNodes",{get:function(){return this.children},set:function(e){this.children=e},enumerable:!0,configurable:!0}),t}(a),d=f,p=function(e){function t(t,n){var r=e.call(this,"script"===t?"script":"style"===t?"style":"tag",[])||this;return r.name=t,r.attribs=n,r.attribs=n,r}return o(t,e),Object.defineProperty(t.prototype,"tagName",{get:function(){return this.name},set:function(e){this.name=e},enumerable:!0,configurable:!0}),t}(f),m=Object.defineProperty({Node:u,DataNode:c,ProcessingInstruction:s,NodeWithChildren:d,Element:p},"__esModule",{value:!0}),h=n,g=m.Element,b=m.DataNode,y=m.ProcessingInstruction,v={},w=0,T=h.length;w<T;w++)t=h[w],v[t.toLowerCase()]=t;function M(e){for(var t,n={},r=0,o=e.length;r<o;r++)n[(t=e[r]).name]=t.value;return n}function O(e){var t=function(e){return v[e]}(e=e.toLowerCase());return t||e}var N={formatAttributes:M,formatDOM:function e(t,n,r){var o,i,a;n=n||null;for(var u=[],l=0,c=t.length;l<c;l++){switch((o=t[l]).nodeType){case 1:(i=new g(O(o.nodeName),M(o.attributes))).children=e(o.childNodes,i);break;case 3:i=new b("text",o.nodeValue);break;case 8:i=new b("comment",o.nodeValue)}(a=u[l-1]||null)&&(a.next=i),i.parent=n,i.prev=a,i.next=null,u.push(i)}return r&&((i=new y(r.substring(0,r.indexOf(" ")).toLowerCase(),r)).next=u[0]||null,i.parent=n,u.unshift(i),u[1]&&(u[1].prev=u[0])),u},isIE:function(e){return e?document.documentMode===e:/(MSIE |Trident\/|Edge\/)/.test(navigator.userAgent)}},x="html",P="head",E="body",j=/<([a-zA-Z]+[0-9]?)/,L=/<head.*>/i,C=/<body.*>/i,D=/<(area|base|br|col|embed|hr|img|input|keygen|link|menuitem|meta|param|source|track|wbr)(.*?)\/?>/gi,_=N.isIE(9),I=_||N.isIE(),B=function(){throw new Error("This browser does not support `document.implementation.createHTMLDocument`")},F=function(){throw new Error("This browser does not support `DOMParser.prototype.parseFromString`")};if("function"==typeof window.DOMParser){var S=new window.DOMParser,A=_?"text/xml":"text/html";B=F=function(e,t){return t&&(e="<"+t+">"+e+"</"+t+">"),_&&(e=e.replace(D,"<$1$2$3/>")),S.parseFromString(e,A)}}if(document.implementation){var H=document.implementation.createHTMLDocument(I?"html-dom-parser":void 0);B=function(e,t){if(t)return H.documentElement.getElementsByTagName(t)[0].innerHTML=e,H;try{return H.documentElement.innerHTML=e,H}catch(t){if(F)return F(e)}}}var k,G=document.createElement("template");G.content&&(k=function(e){return G.innerHTML=e,G.content.childNodes});var V=function(e){var t,n,r,o,i=e.match(j);switch(i&&i[1]&&(t=i[1].toLowerCase()),t){case x:return n=F(e),L.test(e)||(r=n.getElementsByTagName(P)[0])&&r.parentNode.removeChild(r),C.test(e)||(r=n.getElementsByTagName(E)[0])&&r.parentNode.removeChild(r),n.getElementsByTagName(x);case P:case E:return o=B(e).getElementsByTagName(t),C.test(e)&&L.test(e)?o[0].parentNode.childNodes:o;default:return k?k(e):B(e,E).getElementsByTagName(E)[0].childNodes}},$=N.formatDOM,z=N.isIE(9),Z=/<(![a-zA-Z\s]+)>/;return function(e){if("string"!=typeof e)throw new TypeError("First argument must be a string");if(!e)return[];var t,n=e.match(Z);return n&&n[1]&&(t=n[1],z&&(e=e.replace(n[0],""))),$(V(e),null,t)}})); | ||
//# sourceMappingURL=html-dom-parser.min.js.map |
// TypeScript Version: 4.1 | ||
import { DomElement } from 'domhandler'; | ||
import { DataNode, Element } from 'domhandler'; | ||
@@ -11,2 +11,2 @@ /** | ||
*/ | ||
export default function HTMLDOMParser(html: string): DomElement[]; | ||
export default function HTMLDOMParser(html: string): Array<DataNode | Element>; |
// TypeScript Version: 4.1 | ||
import { DomHandlerOptions, DomElement } from 'domhandler'; | ||
import { DataNode, DomHandlerOptions, Element } from 'domhandler'; | ||
@@ -9,6 +9,6 @@ /** | ||
* This is the same method as `require('htmlparser2').parseDOM` | ||
* https://github.com/fb55/htmlparser2/blob/v3.9.1/lib/index.js#L39-L43 | ||
* https://github.com/fb55/htmlparser2/blob/v4.0.0/src/index.ts#L18-L22 | ||
* | ||
* @param html - HTML markup. | ||
* @param options - Parser options (https://github.com/fb55/domhandler/tree/v2.4.2#readme). | ||
* @param options - Parser options (https://github.com/fb55/domhandler/tree/v3.0.0#readme). | ||
* @return - DOM elements. | ||
@@ -19,2 +19,2 @@ */ | ||
options?: DomHandlerOptions | ||
): DomElement[]; | ||
): Array<DataNode | Element>; |
@@ -1,3 +0,3 @@ | ||
var Parser = require('htmlparser2/lib/Parser'); | ||
var DomHandler = require('domhandler'); | ||
var Parser = require('htmlparser2/lib/Parser').Parser; | ||
var DomHandler = require('domhandler').DomHandler; | ||
@@ -8,7 +8,7 @@ /** | ||
* This is the same method as `require('htmlparser2').parseDOM` | ||
* https://github.com/fb55/htmlparser2/blob/v3.9.1/lib/index.js#L39-L43 | ||
* https://github.com/fb55/htmlparser2/blob/v4.0.0/src/index.ts#L18-L22 | ||
* | ||
* @param {String} html - HTML markup. | ||
* @param {Object} [options] - Parser options (https://github.com/fb55/domhandler/tree/v2.4.2#readme). | ||
* @return {DomElement[]} - DOM elements. | ||
* @param {string} html - HTML markup. | ||
* @param {DomHandlerOptions} [options] - Parser options (https://github.com/fb55/domhandler/tree/v3.0.0#readme). | ||
* @return {DomElement[]} - DOM elements. | ||
*/ | ||
@@ -24,3 +24,3 @@ function HTMLDOMParser(html, options) { | ||
var handler = new DomHandler(options); | ||
var handler = new DomHandler(undefined, options); | ||
new Parser(handler, options).end(html); | ||
@@ -27,0 +27,0 @@ return handler.dom; |
// TypeScript Version: 4.1 | ||
import { DomElement } from 'domhandler'; | ||
import { DataNode, Element } from 'domhandler'; | ||
@@ -25,5 +25,5 @@ /** | ||
nodes: NodeList, | ||
parentNode?: DomElement, | ||
parentNode?: DataNode | Element, | ||
directive?: string | ||
): DomElement[]; | ||
): Array<DataNode | Element>; | ||
@@ -30,0 +30,0 @@ /** |
@@ -1,5 +0,13 @@ | ||
var CASE_SENSITIVE_TAG_NAMES = require('./constants').CASE_SENSITIVE_TAG_NAMES; | ||
var constants = require('./constants'); | ||
var domhandler = require('domhandler/lib/node'); | ||
var CASE_SENSITIVE_TAG_NAMES = constants.CASE_SENSITIVE_TAG_NAMES; | ||
var Element = domhandler.Element; | ||
var DataNode = domhandler.DataNode; | ||
var ProcessingInstruction = domhandler.ProcessingInstruction; | ||
var caseSensitiveTagNamesMap = {}; | ||
var tagName; | ||
for (var i = 0, len = CASE_SENSITIVE_TAG_NAMES.length; i < len; i++) { | ||
@@ -56,87 +64,67 @@ tagName = CASE_SENSITIVE_TAG_NAMES[i]; | ||
* | ||
* @param {NodeList} nodes - DOM nodes. | ||
* @param {object} [parentNode] - Formatted parent node. | ||
* @param {string} [directive] - Directive. | ||
* @return {DomElement[]} - Formatted DOM object. | ||
* @param {NodeList} nodes - DOM nodes. | ||
* @param {DataNode|Element} [parentNode] - Formatted parent node. | ||
* @param {string} [directive] - Directive. | ||
* @return {Array<DomNode|Element>} - Formatted DOM object. | ||
*/ | ||
function formatDOM(nodes, parentNode, directive) { | ||
function formatDOM(domNodes, parentNode, directive) { | ||
parentNode = parentNode || null; | ||
var result = []; | ||
var domNode; | ||
var node; | ||
var prevNode; | ||
var nodeObj; | ||
var output = []; | ||
// `NodeList` is array-like | ||
for (var i = 0, len = nodes.length; i < len; i++) { | ||
node = nodes[i]; | ||
// reset | ||
nodeObj = { | ||
next: null, | ||
prev: result[i - 1] || null, | ||
parent: parentNode | ||
}; | ||
for (var i = 0, len = domNodes.length; i < len; i++) { | ||
domNode = domNodes[i]; | ||
// set the next node for the previous node (if applicable) | ||
prevNode = result[i - 1]; | ||
if (prevNode) { | ||
prevNode.next = nodeObj; | ||
} | ||
// set the node name if it's not "#text" or "#comment" | ||
// e.g., "div" | ||
if (node.nodeName[0] !== '#') { | ||
nodeObj.name = formatTagName(node.nodeName); | ||
// also, nodes of type "tag" have "attribs" | ||
nodeObj.attribs = {}; // default | ||
if (node.attributes && node.attributes.length) { | ||
nodeObj.attribs = formatAttributes(node.attributes); | ||
} | ||
} | ||
// set the node type | ||
// e.g., "tag" | ||
switch (node.nodeType) { | ||
// 1 = element | ||
// set the node data given the type | ||
switch (domNode.nodeType) { | ||
case 1: | ||
if (nodeObj.name === 'script' || nodeObj.name === 'style') { | ||
nodeObj.type = nodeObj.name; | ||
} else { | ||
nodeObj.type = 'tag'; | ||
} | ||
// recursively format the children | ||
nodeObj.children = formatDOM(node.childNodes, nodeObj); | ||
// script, style, or tag | ||
node = new Element( | ||
formatTagName(domNode.nodeName), | ||
formatAttributes(domNode.attributes) | ||
); | ||
node.children = formatDOM(domNode.childNodes, node); | ||
break; | ||
// 2 = attribute | ||
// 3 = text | ||
case 3: | ||
nodeObj.type = 'text'; | ||
nodeObj.data = node.nodeValue; | ||
node = new DataNode('text', domNode.nodeValue); | ||
break; | ||
// 8 = comment | ||
case 8: | ||
nodeObj.type = 'comment'; | ||
nodeObj.data = node.nodeValue; | ||
node = new DataNode('comment', domNode.nodeValue); | ||
break; | ||
} | ||
result.push(nodeObj); | ||
// set next for previous node | ||
prevNode = output[i - 1] || null; | ||
if (prevNode) { | ||
prevNode.next = node; | ||
} | ||
// set properties for current node | ||
node.parent = parentNode; | ||
node.prev = prevNode; | ||
node.next = null; | ||
output.push(node); | ||
} | ||
if (directive) { | ||
result.unshift({ | ||
name: directive.substring(0, directive.indexOf(' ')).toLowerCase(), | ||
data: directive, | ||
type: 'directive', | ||
next: result[0] ? result[0] : null, | ||
prev: null, | ||
parent: parentNode | ||
}); | ||
node = new ProcessingInstruction( | ||
directive.substring(0, directive.indexOf(' ')).toLowerCase(), | ||
directive | ||
); | ||
node.next = output[0] || null; | ||
node.parent = parentNode; | ||
output.unshift(node); | ||
if (result[1]) { | ||
result[1].prev = result[0]; | ||
if (output[1]) { | ||
output[1].prev = output[0]; | ||
} | ||
} | ||
return result; | ||
return output; | ||
} | ||
@@ -143,0 +131,0 @@ |
{ | ||
"name": "html-dom-parser", | ||
"version": "0.3.1", | ||
"version": "0.4.0", | ||
"description": "HTML to DOM parser.", | ||
@@ -37,5 +37,4 @@ "author": "Mark <mark@remarkablemark.org>", | ||
"dependencies": { | ||
"@types/domhandler": "2.4.1", | ||
"domhandler": "2.4.2", | ||
"htmlparser2": "3.10.1" | ||
"domhandler": "3.0.0", | ||
"htmlparser2": "4.0.0" | ||
}, | ||
@@ -42,0 +41,0 @@ "devDependencies": { |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
108033
2
1044
+ Addeddom-serializer@1.4.1(transitive)
+ Addeddomhandler@3.0.04.3.1(transitive)
+ Addeddomutils@2.8.0(transitive)
+ Addedhtmlparser2@4.0.0(transitive)
- Removed@types/domhandler@2.4.1
- Removed@types/domhandler@2.4.1(transitive)
- Removeddom-serializer@0.2.2(transitive)
- Removeddomelementtype@1.3.1(transitive)
- Removeddomhandler@2.4.2(transitive)
- Removeddomutils@1.7.0(transitive)
- Removedentities@1.1.2(transitive)
- Removedhtmlparser2@3.10.1(transitive)
- Removedinherits@2.0.4(transitive)
- Removedreadable-stream@3.6.2(transitive)
- Removedsafe-buffer@5.2.1(transitive)
- Removedstring_decoder@1.3.0(transitive)
- Removedutil-deprecate@1.0.2(transitive)
Updateddomhandler@3.0.0
Updatedhtmlparser2@4.0.0