Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

parse5

Package Overview
Dependencies
Maintainers
1
Versions
58
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

parse5 - npm Package Compare versions

Comparing version 1.5.1 to 2.0.0

lib/common/merge_options.js

137

lib/common/doctype.js

@@ -7,68 +7,68 @@ 'use strict';

QUIRKS_MODE_PUBLIC_ID_PREFIXES = [
"+//silmaril//dtd html pro v0r11 19970101//en",
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
"-//as//dtd html 3.0 aswedit + extensions//en",
"-//ietf//dtd html 2.0 level 1//en",
"-//ietf//dtd html 2.0 level 2//en",
"-//ietf//dtd html 2.0 strict level 1//en",
"-//ietf//dtd html 2.0 strict level 2//en",
"-//ietf//dtd html 2.0 strict//en",
"-//ietf//dtd html 2.0//en",
"-//ietf//dtd html 2.1e//en",
"-//ietf//dtd html 3.0//en",
"-//ietf//dtd html 3.0//en//",
"-//ietf//dtd html 3.2 final//en",
"-//ietf//dtd html 3.2//en",
"-//ietf//dtd html 3//en",
"-//ietf//dtd html level 0//en",
"-//ietf//dtd html level 0//en//2.0",
"-//ietf//dtd html level 1//en",
"-//ietf//dtd html level 1//en//2.0",
"-//ietf//dtd html level 2//en",
"-//ietf//dtd html level 2//en//2.0",
"-//ietf//dtd html level 3//en",
"-//ietf//dtd html level 3//en//3.0",
"-//ietf//dtd html strict level 0//en",
"-//ietf//dtd html strict level 0//en//2.0",
"-//ietf//dtd html strict level 1//en",
"-//ietf//dtd html strict level 1//en//2.0",
"-//ietf//dtd html strict level 2//en",
"-//ietf//dtd html strict level 2//en//2.0",
"-//ietf//dtd html strict level 3//en",
"-//ietf//dtd html strict level 3//en//3.0",
"-//ietf//dtd html strict//en",
"-//ietf//dtd html strict//en//2.0",
"-//ietf//dtd html strict//en//3.0",
"-//ietf//dtd html//en",
"-//ietf//dtd html//en//2.0",
"-//ietf//dtd html//en//3.0",
"-//metrius//dtd metrius presentational//en",
"-//microsoft//dtd internet explorer 2.0 html strict//en",
"-//microsoft//dtd internet explorer 2.0 html//en",
"-//microsoft//dtd internet explorer 2.0 tables//en",
"-//microsoft//dtd internet explorer 3.0 html strict//en",
"-//microsoft//dtd internet explorer 3.0 html//en",
"-//microsoft//dtd internet explorer 3.0 tables//en",
"-//netscape comm. corp.//dtd html//en",
"-//netscape comm. corp.//dtd strict html//en",
"-//o'reilly and associates//dtd html 2.0//en",
"-//o'reilly and associates//dtd html extended 1.0//en",
"-//spyglass//dtd html 2.0 extended//en",
"-//sq//dtd html 2.0 hotmetal + extensions//en",
"-//sun microsystems corp.//dtd hotjava html//en",
"-//sun microsystems corp.//dtd hotjava strict html//en",
"-//w3c//dtd html 3 1995-03-24//en",
"-//w3c//dtd html 3.2 draft//en",
"-//w3c//dtd html 3.2 final//en",
"-//w3c//dtd html 3.2//en",
"-//w3c//dtd html 3.2s draft//en",
"-//w3c//dtd html 4.0 frameset//en",
"-//w3c//dtd html 4.0 transitional//en",
"-//w3c//dtd html experimental 19960712//en",
"-//w3c//dtd html experimental 970421//en",
"-//w3c//dtd w3 html//en",
"-//w3o//dtd w3 html 3.0//en",
"-//w3o//dtd w3 html 3.0//en//",
"-//webtechs//dtd mozilla html 2.0//en",
"-//webtechs//dtd mozilla html//en"
'+//silmaril//dtd html pro v0r11 19970101//en',
'-//advasoft ltd//dtd html 3.0 aswedit + extensions//en',
'-//as//dtd html 3.0 aswedit + extensions//en',
'-//ietf//dtd html 2.0 level 1//en',
'-//ietf//dtd html 2.0 level 2//en',
'-//ietf//dtd html 2.0 strict level 1//en',
'-//ietf//dtd html 2.0 strict level 2//en',
'-//ietf//dtd html 2.0 strict//en',
'-//ietf//dtd html 2.0//en',
'-//ietf//dtd html 2.1e//en',
'-//ietf//dtd html 3.0//en',
'-//ietf//dtd html 3.0//en//',
'-//ietf//dtd html 3.2 final//en',
'-//ietf//dtd html 3.2//en',
'-//ietf//dtd html 3//en',
'-//ietf//dtd html level 0//en',
'-//ietf//dtd html level 0//en//2.0',
'-//ietf//dtd html level 1//en',
'-//ietf//dtd html level 1//en//2.0',
'-//ietf//dtd html level 2//en',
'-//ietf//dtd html level 2//en//2.0',
'-//ietf//dtd html level 3//en',
'-//ietf//dtd html level 3//en//3.0',
'-//ietf//dtd html strict level 0//en',
'-//ietf//dtd html strict level 0//en//2.0',
'-//ietf//dtd html strict level 1//en',
'-//ietf//dtd html strict level 1//en//2.0',
'-//ietf//dtd html strict level 2//en',
'-//ietf//dtd html strict level 2//en//2.0',
'-//ietf//dtd html strict level 3//en',
'-//ietf//dtd html strict level 3//en//3.0',
'-//ietf//dtd html strict//en',
'-//ietf//dtd html strict//en//2.0',
'-//ietf//dtd html strict//en//3.0',
'-//ietf//dtd html//en',
'-//ietf//dtd html//en//2.0',
'-//ietf//dtd html//en//3.0',
'-//metrius//dtd metrius presentational//en',
'-//microsoft//dtd internet explorer 2.0 html strict//en',
'-//microsoft//dtd internet explorer 2.0 html//en',
'-//microsoft//dtd internet explorer 2.0 tables//en',
'-//microsoft//dtd internet explorer 3.0 html strict//en',
'-//microsoft//dtd internet explorer 3.0 html//en',
'-//microsoft//dtd internet explorer 3.0 tables//en',
'-//netscape comm. corp.//dtd html//en',
'-//netscape comm. corp.//dtd strict html//en',
'-//o\'reilly and associates//dtd html 2.0//en',
'-//o\'reilly and associates//dtd html extended 1.0//en',
'-//spyglass//dtd html 2.0 extended//en',
'-//sq//dtd html 2.0 hotmetal + extensions//en',
'-//sun microsystems corp.//dtd hotjava html//en',
'-//sun microsystems corp.//dtd hotjava strict html//en',
'-//w3c//dtd html 3 1995-03-24//en',
'-//w3c//dtd html 3.2 draft//en',
'-//w3c//dtd html 3.2 final//en',
'-//w3c//dtd html 3.2//en',
'-//w3c//dtd html 3.2s draft//en',
'-//w3c//dtd html 4.0 frameset//en',
'-//w3c//dtd html 4.0 transitional//en',
'-//w3c//dtd html experimental 19960712//en',
'-//w3c//dtd html experimental 970421//en',
'-//w3c//dtd w3 html//en',
'-//w3o//dtd w3 html 3.0//en',
'-//w3o//dtd w3 html 3.0//en//',
'-//webtechs//dtd mozilla html 2.0//en',
'-//webtechs//dtd mozilla html//en'
],

@@ -123,4 +123,7 @@ QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = [

exports.serializeContent = function (name, publicId, systemId) {
var str = '!DOCTYPE ' + name;
var str = '!DOCTYPE ';
if(name)
str += name;
if (publicId !== null)

@@ -127,0 +130,0 @@ str += ' PUBLIC ' + enquoteDoctypeId(publicId);

'use strict';
var Tokenizer = require('../tokenization/tokenizer'),
var Tokenizer = require('../tokenizer'),
HTML = require('./html');

@@ -28,8 +28,4 @@

'clippathunits': 'clipPathUnits',
'contentscripttype': 'contentScriptType',
'contentstyletype': 'contentStyleType',
'diffuseconstant': 'diffuseConstant',
'edgemode': 'edgeMode',
'externalresourcesrequired': 'externalResourcesRequired',
'filterres': 'filterRes',
'filterunits': 'filterUnits',

@@ -193,10 +189,7 @@ 'glyphref': 'glyphRef',

var tn = startTagToken.tagName;
var isFontWithAttrs = tn === $.FONT && (Tokenizer.getTokenAttr(startTagToken, ATTRS.COLOR) !== null ||
Tokenizer.getTokenAttr(startTagToken, ATTRS.SIZE) !== null ||
Tokenizer.getTokenAttr(startTagToken, ATTRS.FACE) !== null);
if (tn === $.FONT && (Tokenizer.getTokenAttr(startTagToken, ATTRS.COLOR) !== null ||
Tokenizer.getTokenAttr(startTagToken, ATTRS.SIZE) !== null ||
Tokenizer.getTokenAttr(startTagToken, ATTRS.FACE) !== null)) {
return true;
}
return EXITS_FOREIGN_CONTENT[tn];
return isFontWithAttrs ? true : EXITS_FOREIGN_CONTENT[tn];
};

@@ -203,0 +196,0 @@

@@ -47,3 +47,2 @@ 'use strict';

COLGROUP: 'colgroup',
COMMAND: 'command',

@@ -128,4 +127,6 @@ DD: 'dd',

RB: 'rb',
RP: 'rp',
RT: 'rt',
RTC: 'rtc',
RUBY: 'ruby',

@@ -132,0 +133,0 @@

@@ -36,3 +36,2 @@ 'use strict';

LATIN_SMALL_Z: 0x7A,
BOM: 0xFEFF,
REPLACEMENT_CHARACTER: 0xFFFD

@@ -39,0 +38,0 @@ };

'use strict';
/**
* @typedef {Object} TreeAdapter
*/
//Node construction
/**
* Creates a document node.
*
* @function createDocument
* @memberof TreeAdapter
*
* @returns {ASTNode<Document>} document
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L19|default implementation.}
*/
exports.createDocument = function () {

@@ -12,2 +27,12 @@ return {

/**
* Creates a document fragment node.
*
* @function createDocumentFragment
* @memberof TreeAdapter
*
* @returns {ASTNode<DocumentFragment>} fragment
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L37|default implementation.}
*/
exports.createDocumentFragment = function () {

@@ -21,2 +46,18 @@ return {

/**
* Creates an element node.
*
* @function createElement
* @memberof TreeAdapter
*
* @param {String} tagName - Tag name of the element.
* @param {String} namespaceURI - Namespace of the element.
* @param {Array} attrs - Attribute name-value pair array.
* Foreign attributes may contain `namespace` and `prefix` fields as well.
*
* @returns {ASTNode<Element>} element
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L61|default implementation.}
*/
exports.createElement = function (tagName, namespaceURI, attrs) {

@@ -33,2 +74,15 @@ return {

/**
* Creates a comment node.
*
* @function createElement
* @memberof TreeAdapter
*
* @param {String} data - Comment text.
*
* @returns {ASTNode<CommentNode>} comment
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L85|default implementation.}
*/
exports.createCommentNode = function (data) {

@@ -47,3 +101,3 @@ return {

parentNode: null
}
};
};

@@ -53,2 +107,84 @@

//Tree mutation
/**
* Appends a child node to the given parent node.
*
* @function appendChild
* @memberof TreeAdapter
*
* @param {ASTNode} parentNode - Parent node.
* @param {ASTNode} newNode - Child node.
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L114|default implementation.}
*/
var appendChild = exports.appendChild = function (parentNode, newNode) {
parentNode.childNodes.push(newNode);
newNode.parentNode = parentNode;
};
/**
* Inserts a child node to the given parent node before the given reference node.
*
* @function insertBefore
* @memberof TreeAdapter
*
* @param {ASTNode} parentNode - Parent node.
* @param {ASTNode} newNode - Child node.
* @param {ASTNode} referenceNode - Reference node.
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L131|default implementation.}
*/
var insertBefore = exports.insertBefore = function (parentNode, newNode, referenceNode) {
var insertionIdx = parentNode.childNodes.indexOf(referenceNode);
parentNode.childNodes.splice(insertionIdx, 0, newNode);
newNode.parentNode = parentNode;
};
/**
* Sets the <template> element content element.
*
* @function setTemplateContent
* @memberof TreeAdapter
*
* @param {ASTNode<TemplateElement>} templateElement - <template> element.
* @param {ASTNode<DocumentFragment>} contentTemplate - Content element.
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L149|default implementation.}
*/
exports.setTemplateContent = function (templateElement, contentElement) {
templateElement.content = contentElement;
};
/**
* Returns the <template> element content element.
*
* @function getTemplateContent
* @memberof TreeAdapter
*
* @param {ASTNode<DocumentFragment>} templateElement - <template> element.
* @returns {Boolean}
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L166|default implementation.}
*/
exports.getTemplateContent = function (templateElement) {
return templateElement.content;
};
/**
* Sets the document type. If the `document` already contains a document type node, the `name`, `publicId` and `systemId`
* properties of this node will be updated with the provided values. Otherwise, creates a new document type node
* with the given properties and inserts it into the `document`.
*
* @function setDocumentType
* @memberof TreeAdapter
*
* @param {ASTNode<Document>} document - Document node.
* @param {String} name - Document type name.
* @param {String} publicId - Document type public identifier.
* @param {String} systemId - Document type system identifier.
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L185|default implementation.}
*/
exports.setDocumentType = function (document, name, publicId, systemId) {

@@ -80,2 +216,12 @@ var doctypeNode = null;

/**
* Sets the document's quirks mode flag.
*
* @function setQuirksMode
* @memberof TreeAdapter
*
* @param {ASTNode<Document>} document - Document node.
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L221|default implementation.}
*/
exports.setQuirksMode = function (document) {

@@ -85,2 +231,14 @@ document.quirksMode = true;

/**
* Determines if the document's quirks mode flag is set.
*
* @function setQuirksMode
* @memberof TreeAdapter
*
* @param {ASTNode<Document>} document - Document node.
* @returns {Boolean}
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L237|default implementation.}
*/
exports.isQuirksMode = function (document) {

@@ -90,14 +248,12 @@ return document.quirksMode;

var appendChild = exports.appendChild = function (parentNode, newNode) {
parentNode.childNodes.push(newNode);
newNode.parentNode = parentNode;
};
/**
* Removes a node from its parent.
*
* @function detachNode
* @memberof TreeAdapter
*
* @param {ASTNode} node - Node.
var insertBefore = exports.insertBefore = function (parentNode, newNode, referenceNode) {
var insertionIdx = parentNode.childNodes.indexOf(referenceNode);
parentNode.childNodes.splice(insertionIdx, 0, newNode);
newNode.parentNode = parentNode;
};
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L251|default implementation.}
*/
exports.detachNode = function (node) {

@@ -112,2 +268,15 @@ if (node.parentNode) {

/**
* Inserts text into a node. If the last child of the node is a text node, the provided text will be appended to the
* text node content. Otherwise, inserts a new text node with the given text.
*
*
* @function insertText
* @memberof TreeAdapter
*
* @param {ASTNode} parentNode - Node to insert text into.
* @param {String} text - Text to insert.
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L273|default implementation.}
*/
exports.insertText = function (parentNode, text) {

@@ -126,2 +295,17 @@ if (parentNode.childNodes.length) {

/**
* Inserts text into a sibling node that goes before the reference node. If this sibling node is the text node,
* the provided text will be appended to the text node content. Otherwise, inserts a new sibling text node with
* the given text before the reference node.
*
*
* @function insertTextBefore
* @memberof TreeAdapter
*
* @param {ASTNode} parentNode - Node to insert text into.
* @param {String} text - Text to insert.
* @param {ASTNode} referenceNode - Node to insert text before.
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L301|default implementation.}
*/
exports.insertTextBefore = function (parentNode, text, referenceNode) {

@@ -136,2 +320,13 @@ var prevNode = parentNode.childNodes[parentNode.childNodes.indexOf(referenceNode) - 1];

/**
* Copies attributes to the given node. Only attributes that are not yet present in the node are copied.
*
* @function adoptAttributes
* @memberof TreeAdapter
*
* @param {ASTNode} recipientNode - Node to copy attributes into.
* @param {Array} attrs - Attributes to copy.
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L321|default implementation.}
*/
exports.adoptAttributes = function (recipientNode, attrs) {

@@ -151,2 +346,15 @@ var recipientAttrsMap = [];

//Tree traversing
/**
* Returns the first child of the given node.
*
* @function getFirstChild
* @memberof TreeAdapter
*
* @param {ASTNode} node - Node.
*
* @returns {ASTNode} firstChild
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L348|default implementation.}
*/
exports.getFirstChild = function (node) {

@@ -156,2 +364,14 @@ return node.childNodes[0];

/**
* Returns the given node's children in an array.
*
* @function getChildNodes
* @memberof TreeAdapter
*
* @param {ASTNode} node - Node.
*
* @returns {Array} children
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L364|default implementation.}
*/
exports.getChildNodes = function (node) {

@@ -161,2 +381,14 @@ return node.childNodes;

/**
* Returns the given node's parent.
*
* @function getParentNode
* @memberof TreeAdapter
*
* @param {ASTNode} node - Node.
*
* @returns {ASTNode} parent
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L380|default implementation.}
*/
exports.getParentNode = function (node) {

@@ -166,2 +398,15 @@ return node.parentNode;

/**
* Returns the given node's attributes in an array, in the form of name-value pairs.
* Foreign attributes may contain `namespace` and `prefix` fields as well.
*
* @function getAttrList
* @memberof TreeAdapter
*
* @param {ASTNode} node - Node.
*
* @returns {Array} attributes
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L397|default implementation.}
*/
exports.getAttrList = function (node) {

@@ -172,2 +417,15 @@ return node.attrs;

//Node data
/**
* Returns the given element's tag name.
*
* @function getTagName
* @memberof TreeAdapter
*
* @param {ASTNode<Element>} element - Element.
*
* @returns {String} tagName
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L415|default implementation.}
*/
exports.getTagName = function (element) {

@@ -177,2 +435,14 @@ return element.tagName;

/**
* Returns the given element's namespace.
*
* @function getNamespaceURI
* @memberof TreeAdapter
*
* @param {ASTNode<Element>} element - Element.
*
* @returns {String} namespaceURI
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L431|default implementation.}
*/
exports.getNamespaceURI = function (element) {

@@ -182,2 +452,14 @@ return element.namespaceURI;

/**
* Returns the given text node's content.
*
* @function getTextNodeContent
* @memberof TreeAdapter
*
* @param {ASTNode<Text>} textNode - Text node.
*
* @returns {String} text
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L447|default implementation.}
*/
exports.getTextNodeContent = function (textNode) {

@@ -187,2 +469,14 @@ return textNode.value;

/**
* Returns the given comment node's content.
*
* @function getTextNodeContent
* @memberof TreeAdapter
*
* @param {ASTNode<Comment>} commentNode - Comment node.
*
* @returns {String} commentText
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L463|default implementation.}
*/
exports.getCommentNodeContent = function (commentNode) {

@@ -192,2 +486,14 @@ return commentNode.data;

/**
* Returns the given document type node's name.
*
* @function getDocumentTypeNodeName
* @memberof TreeAdapter
*
* @param {ASTNode<DocumentType>} doctypeNode - Document type node.
*
* @returns {String} name
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L479|default implementation.}
*/
exports.getDocumentTypeNodeName = function (doctypeNode) {

@@ -197,2 +503,14 @@ return doctypeNode.name;

/**
* Returns the given document type node's public identifier.
*
* @function getDocumentTypeNodePublicId
* @memberof TreeAdapter
*
* @param {ASTNode<DocumentType>} doctypeNode - Document type node.
*
* @returns {String} publicId
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L495|default implementation.}
*/
exports.getDocumentTypeNodePublicId = function (doctypeNode) {

@@ -202,2 +520,14 @@ return doctypeNode.publicId;

/**
* Returns the given document type node's system identifier.
*
* @function getDocumentTypeNodeSystemId
* @memberof TreeAdapter
*
* @param {ASTNode<DocumentType>} doctypeNode - Document type node.
*
* @returns {String} systemId
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L511|default implementation.}
*/
exports.getDocumentTypeNodeSystemId = function (doctypeNode) {

@@ -208,2 +538,14 @@ return doctypeNode.systemId;

//Node types
/**
* Determines if the given node is a text node.
*
* @function isTextNode
* @memberof TreeAdapter
*
* @param {ASTNode} node - Node.
*
* @returns {Boolean}
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L526|default implementation.}
*/
exports.isTextNode = function (node) {

@@ -213,2 +555,14 @@ return node.nodeName === '#text';

/**
* Determines if the given node is a comment node.
*
* @function isCommentNode
* @memberof TreeAdapter
*
* @param {ASTNode} node - Node.
*
* @returns {Boolean}
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L544|default implementation.}
*/
exports.isCommentNode = function (node) {

@@ -218,2 +572,14 @@ return node.nodeName === '#comment';

/**
* Determines if the given node is a document type node.
*
* @function isDocumentTypeNode
* @memberof TreeAdapter
*
* @param {ASTNode} node - Node.
*
* @returns {Boolean}
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L560|default implementation.}
*/
exports.isDocumentTypeNode = function (node) {

@@ -223,4 +589,16 @@ return node.nodeName === '#documentType';

/**
* Determines if the given node is an element.
*
* @function isElementNode
* @memberof TreeAdapter
*
* @param {ASTNode} node - Node.
*
* @returns {Boolean}
*
* @see {@link https://github.com/inikulin/parse5/blob/tree-adapter-docs-rev/lib/tree_adapters/default.js#L576|default implementation.}
*/
exports.isElementNode = function (node) {
return !!node.tagName;
};
'use strict';
var Doctype = require('../common/doctype');
var doctype = require('../common/doctype');

@@ -33,2 +33,3 @@ //Conversion tables for DOM Level1 structure emulation

var children = this.children;
return children && children[0] || null;

@@ -39,2 +40,3 @@ },

var children = this.children;
return children && children[children.length - 1] || null;

@@ -65,12 +67,12 @@ },

exports.createDocument =
exports.createDocumentFragment = function () {
return new Node({
type: 'root',
name: 'root',
parent: null,
prev: null,
next: null,
children: []
});
};
exports.createDocumentFragment = function () {
return new Node({
type: 'root',
name: 'root',
parent: null,
prev: null,
next: null,
children: []
});
};

@@ -126,4 +128,40 @@ exports.createElement = function (tagName, namespaceURI, attrs) {

//Tree mutation
var appendChild = exports.appendChild = function (parentNode, newNode) {
var prev = parentNode.children[parentNode.children.length - 1];
if (prev) {
prev.next = newNode;
newNode.prev = prev;
}
parentNode.children.push(newNode);
newNode.parent = parentNode;
};
var insertBefore = exports.insertBefore = function (parentNode, newNode, referenceNode) {
var insertionIdx = parentNode.children.indexOf(referenceNode),
prev = referenceNode.prev;
if (prev) {
prev.next = newNode;
newNode.prev = prev;
}
referenceNode.prev = newNode;
newNode.next = referenceNode;
parentNode.children.splice(insertionIdx, 0, newNode);
newNode.parent = parentNode;
};
exports.setTemplateContent = function (templateElement, contentElement) {
appendChild(templateElement, contentElement);
};
exports.getTemplateContent = function (templateElement) {
return templateElement.children[0];
};
exports.setDocumentType = function (document, name, publicId, systemId) {
var data = Doctype.serializeContent(name, publicId, systemId),
var data = doctype.serializeContent(name, publicId, systemId),
doctypeNode = null;

@@ -166,30 +204,2 @@

var appendChild = exports.appendChild = function (parentNode, newNode) {
var prev = parentNode.children[parentNode.children.length - 1];
if (prev) {
prev.next = newNode;
newNode.prev = prev;
}
parentNode.children.push(newNode);
newNode.parent = parentNode;
};
var insertBefore = exports.insertBefore = function (parentNode, newNode, referenceNode) {
var insertionIdx = parentNode.children.indexOf(referenceNode),
prev = referenceNode.prev;
if (prev) {
prev.next = newNode;
newNode.prev = prev;
}
referenceNode.prev = newNode;
newNode.next = referenceNode;
parentNode.children.splice(insertionIdx, 0, newNode);
newNode.parent = parentNode;
};
exports.detachNode = function (node) {

@@ -196,0 +206,0 @@ if (node.parent) {

{
"name": "parse5",
"description": "WHATWG HTML5 specification-compliant, fast and ready for production HTML parsing/serialization toolset for Node and io.js.",
"version": "1.5.1",
"version": "2.0.0",
"author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)",

@@ -12,5 +12,17 @@ "contributors": [

],
"homepage": "http://inikulin.github.io/parse5/",
"homepage": "https://github.com/inikulin/parse5",
"devDependencies": {
"mocha": "1.21.4"
"del": "^2.0.2",
"gulp": "^3.9.0",
"gulp-benchmark": "^1.1.1",
"gulp-concat": "^2.6.0",
"gulp-download": "0.0.1",
"gulp-eslint": "^1.0.0",
"gulp-insert": "^0.5.0",
"gulp-install": "^0.5.0",
"gulp-jsdoc-to-markdown": "^1.1.1",
"gulp-mocha": "^2.1.3",
"gulp-rename": "^1.2.2",
"promise": "^7.0.4",
"through2": "^2.0.0"
},

@@ -34,9 +46,4 @@ "keywords": [

],
"licenses": [
{
"type": "MIT",
"url": "https://raw.github.com/inikulin/parse5/master/LICENSE"
}
],
"main": "./index.js",
"license": "MIT",
"main": "./lib/index.js",
"repository": {

@@ -47,4 +54,7 @@ "type": "git",

"scripts": {
"test": "node test/run_tests.js"
}
"test": "gulp test"
},
"files": [
"lib"
]
}
<p align="center">
<img src="https://raw.github.com/inikulin/parse5/master/logo.png" alt="parse5" />
<a href="https://github.com/inikulin/parse5">
<img src="https://raw.github.com/inikulin/parse5/master/docs/logo.png" alt="parse5" />
</a>
</p>
[![Build Status](https://api.travis-ci.org/inikulin/parse5.svg)](https://travis-ci.org/inikulin/parse5)
[![npm](https://img.shields.io/npm/v/parse5.svg)](https://www.npmjs.com/package/parse5)
<p align="center">
<i>WHATWG HTML5 specification-compliant, fast and ready for production HTML parsing/serialization toolset for Node.js</i>
</p>
*WHATWG HTML5 specification-compliant, fast and ready for production HTML parsing/serialization toolset for Node and io.js.*
<p align="center">
<a href="https://travis-ci.org/inikulin/parse5"><img alt="Build Status" src="https://api.travis-ci.org/inikulin/parse5.svg"></a>
<a href="https://www.npmjs.com/package/parse5"><img alt="NPM Version" src="https://img.shields.io/npm/v/parse5.svg"></a>
<a href="https://npmjs.org/package/parse5"><img alt="Downloads" src="http://img.shields.io/npm/dm/parse5.svg"></a>
<a href="https://npmjs.org/package/parse5"><img alt="Downloads total" src="http://img.shields.io/npm/dt/parse5.svg"></a>
</p>
I needed fast and ready for production HTML parser, which will parse HTML as a modern browser's parser.
Existing solutions were either too slow or their output was too inaccurate. So, this is how parse5 was born.
<p align="center">
<b><i>parse5</i></b> provides nearly everything you may need when dealing with HTML. It's the fastest spec-compliant HTML parser
for Node to date. It parses HTML the way the latest version of your browser does. It has proven itself reliable in such projects
as <a href="https://github.com/tmpvar/jsdom">jsdom</a>, <a href="https://github.com/angular/angular">Angular2</a>, <a href="https://www.polymer-project.org">Polymer</a> and many more.
</p>
**Included tools:**
* [Parser](#class-parser) - HTML to DOM-tree parser.
* [SimpleApiParser](#class-simpleapiparser) - [SAX](http://en.wikipedia.org/wiki/Simple_API_for_XML)-style parser for HTML.
* [Serializer](#class-serializer) - DOM-tree to HTML code serializer.
----
## Install
```
$ npm install parse5
```
<p align="center">
<a href="https://github.com/inikulin/parse5/wiki/Documentation">Documentation</a>
</p>
## Usage
```js
var Parser = require('parse5').Parser;
//Instantiate parser
var parser = new Parser();
//Then feed it with an HTML document
var document = parser.parse('<!DOCTYPE html><html><head></head><body>Hi there!</body></html>')
//Now let's parse HTML-snippet
var fragment = parser.parseFragment('<title>Parse5 is &#102;&#117;&#99;&#107;ing awesome!</title><h1>42</h1>');
```
## Is it fast?
Check out [this benchmark](https://github.com/inikulin/node-html-parser-bench).
```
Starting benchmark. Fasten your seatbelts...
html5 (https://github.com/aredridel/html5) x 0.18 ops/sec ±5.92% (5 runs sampled)
htmlparser (https://github.com/tautologistics/node-htmlparser/) x 3.83 ops/sec ±42.43% (14 runs sampled)
htmlparser2 (https://github.com/fb55/htmlparser2) x 4.05 ops/sec ±39.27% (15 runs sampled)
parse5 (https://github.com/inikulin/parse5) x 3.04 ops/sec ±51.81% (13 runs sampled)
Fastest is htmlparser2 (https://github.com/fb55/htmlparser2),parse5 (https://github.com/inikulin/parse5)
```
So, parse5 is as fast as simple specification incompatible parsers and ~15-times(!) faster than the current specification compatible parser available for the node.
## API reference
### Enum: TreeAdapters
Provides built-in tree adapters which can be passed as an optional argument to the `Parser` and `Serializer` constructors.
#### &bull; TreeAdapters.default
Default tree format for parse5.
#### &bull; TreeAdapters.htmlparser2
Quite popular [htmlparser2](https://github.com/fb55/htmlparser2) tree format (e.g. used in [cheerio](https://github.com/MatthewMueller/cheerio) and [jsdom](https://github.com/tmpvar/jsdom)).
---------------------------------------
### Class: Parser
Provides HTML parsing functionality.
#### &bull; Parser.ctor([treeAdapter, options])
Creates new reusable instance of the `Parser`. Optional `treeAdapter` argument specifies resulting tree format. If `treeAdapter` argument is not specified, `default` tree adapter will be used.
`options` object provides the parsing algorithm modifications:
##### options.decodeHtmlEntities
Decode HTML-entities like `&amp;`, `&nbsp;`, etc. Default: `true`. **Warning:** disabling this option may cause output which is not conform HTML5 specification.
##### options.locationInfo
Enables source code location information for the nodes. Default: `false`. When enabled, each node (except root node) has `__location` property, which contains `start` and `end` indices of the node in the source code. If element was implicitly created by the parser it's `__location` property will be `null`. In case the node is not an empty element, `__location` has two addition properties `startTag` and `endTag` which contain location information for individual tags in a fashion similar to `__location` property.
*Example:*
```js
var parse5 = require('parse5');
//Instantiate new parser with default tree adapter
var parser1 = new parse5.Parser();
//Instantiate new parser with htmlparser2 tree adapter
var parser2 = new parse5.Parser(parse5.TreeAdapters.htmlparser2);
```
#### &bull; Parser.parse(html)
Parses specified `html` string. Returns `document` node.
*Example:*
```js
var document = parser.parse('<!DOCTYPE html><html><head></head><body>Hi there!</body></html>');
```
#### &bull; Parser.parseFragment(htmlFragment, [contextElement])
Parses given `htmlFragment`. Returns `documentFragment` node. Optional `contextElement` argument specifies context in which given `htmlFragment` will be parsed (consider it as setting `contextElement.innerHTML` property). If `contextElement` argument is not specified then `<template>` element will be used as a context and fragment will be parsed in 'forgiving' manner.
*Example:*
```js
var documentFragment = parser.parseFragment('<table></table>');
//Parse html fragment in context of the parsed <table> element
var trFragment = parser.parseFragment('<tr><td>Shake it, baby</td></tr>', documentFragment.childNodes[0]);
```
---------------------------------------
### Class: SimpleApiParser
Provides [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML parsing functionality.
#### &bull; SimpleApiParser.ctor(handlers, [options])
Creates new reusable instance of the `SimpleApiParser`. `handlers` argument specifies object that contains parser's event handlers. Possible events and their signatures are shown in the example.
`options` object provides the parsing algorithm modifications:
##### options.decodeHtmlEntities
Decode HTML-entities like `&amp;`, `&nbsp;`, etc. Default: `true`. **Warning:** disabling this option may cause output which is not conform HTML5 specification.
##### options.locationInfo
Enables source code location information for the tokens. Default: `false`. When enabled, each node handler receives `location` object as it's last argument. `location` object contains `start` and `end` indices of the token in the source code.
*Example:*
```js
var parse5 = require('parse5');
var parser = new parse5.SimpleApiParser({
doctype: function(name, publicId, systemId /*, [location] */) {
//Handle doctype here
},
startTag: function(tagName, attrs, selfClosing /*, [location] */) {
//Handle start tags here
},
endTag: function(tagName /*, [location] */) {
//Handle end tags here
},
text: function(text /*, [location] */) {
//Handle texts here
},
comment: function(text /*, [location] */) {
//Handle comments here
}
});
```
#### &bull; SimpleApiParser.parse(html)
Raises parser events for the given `html`.
*Example:*
```js
var parse5 = require('parse5');
var parser = new parse5.SimpleApiParser({
text: function(text) {
console.log(text);
}
});
parser.parse('<body>Yo!</body>');
```
---------------------------------------
### Class: Serializer
Provides tree-to-HTML serialization functionality.
**Note:** prior to v1.2.0 this class was called `TreeSerializer`. However, it's still accessible as `parse5.TreeSerializer` for backward compatibility.
#### &bull; Serializer.ctor([treeAdapter, options])
Creates new reusable instance of the `Serializer`. Optional `treeAdapter` argument specifies input tree format. If `treeAdapter` argument is not specified, `default` tree adapter will be used.
`options` object provides the serialization algorithm modifications:
##### options.encodeHtmlEntities
HTML-encode characters like `<`, `>`, `&`, etc. Default: `true`. **Warning:** disabling this option may cause output which is not conform HTML5 specification.
*Example:*
```js
var parse5 = require('parse5');
//Instantiate new serializer with default tree adapter
var serializer1 = new parse5.Serializer();
//Instantiate new serializer with htmlparser2 tree adapter
var serializer2 = new parse5.Serializer(parse5.TreeAdapters.htmlparser2);
```
#### &bull; Serializer.serialize(node)
Serializes the given `node`. Returns HTML string.
*Example:*
```js
var document = parser.parse('<!DOCTYPE html><html><head></head><body>Hi there!</body></html>');
//Serialize document
var html = serializer.serialize(document);
//Serialize <body> element content
var bodyInnerHtml = serializer.serialize(document.childNodes[0].childNodes[1]);
```
---------------------------------------
## Testing
Test data is adopted from [html5lib project](https://github.com/html5lib). Parser is covered by more than 8000 test cases.
To run tests:
```
$ npm test
```
## Custom tree adapter
You can create a custom tree adapter so parse5 can work with your own DOM-tree implementation.
Just pass your adapter implementation to the parser's constructor as an argument:
```js
var Parser = require('parse5').Parser;
var myTreeAdapter = {
//Adapter methods...
};
//Instantiate parser
var parser = new Parser(myTreeAdapter);
```
Sample implementation can be found [here](https://github.com/inikulin/parse5/blob/master/lib/tree_adapters/default.js).
The custom tree adapter should implement all methods exposed via `exports` in the sample implementation.
## Questions or suggestions?
If you have any questions, please feel free to create an issue [here on github](https://github.com/inikulin/parse5/issues).
## Author
[Ivan Nikulin](https://github.com/inikulin) (ifaaan@gmail.com)
<p align="center">
<a href="https://github.com/inikulin/parse5/issues">Issue tracker</a>
</p>

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc