Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@markuplint/html-parser

Package Overview
Dependencies
Maintainers
1
Versions
170
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@markuplint/html-parser - npm Package Compare versions

Comparing version 1.0.0-alpha.13 to 1.0.0-alpha.14

lib/flatten-nodes.d.ts

60

lib/attr-tokenizer.js

@@ -6,51 +6,51 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
var uuid_1 = __importDefault(require("uuid"));
var tokenizer_1 = __importDefault(require("./tokenizer"));
const uuid_1 = __importDefault(require("uuid"));
const tokenizer_1 = __importDefault(require("./tokenizer"));
// eslint-disable-next-line no-control-regex
var reAttrsInStartTag = /(\s*)([^\x00-\x1f\x7f-\x9f "'>/=]+)(?:(\s*)(=)(\s*)(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s]*)))?/;
const reAttrsInStartTag = /(\s*)([^\x00-\x1f\x7f-\x9f "'>/=]+)(?:(\s*)(=)(\s*)(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s]*)))?/;
function attrTokenizer(raw, line, col, startOffset) {
var attrMatchedMap = raw.match(reAttrsInStartTag);
const attrMatchedMap = raw.match(reAttrsInStartTag);
if (!attrMatchedMap) {
throw new SyntaxError('Illegal attribute token');
}
var spacesBeforeAttrString = attrMatchedMap[1];
var nameChars = attrMatchedMap[2];
var spacesBeforeEqualChars = attrMatchedMap[3] || '';
var equalChars = attrMatchedMap[4] || null;
var spacesAfterEqualChars = attrMatchedMap[5] || '';
var quoteChars = attrMatchedMap[6] != null ? '"' : attrMatchedMap[7] != null ? "'" : null;
var valueChars = attrMatchedMap[6] || attrMatchedMap[7] || attrMatchedMap[8] || (quoteChars ? '' : null);
var invalid = !!(valueChars && quoteChars === null && /["'=<>`]/.test(valueChars)) ||
const spacesBeforeAttrString = attrMatchedMap[1];
const nameChars = attrMatchedMap[2];
const spacesBeforeEqualChars = attrMatchedMap[3] || '';
const equalChars = attrMatchedMap[4] || null;
const spacesAfterEqualChars = attrMatchedMap[5] || '';
const quoteChars = attrMatchedMap[6] != null ? '"' : attrMatchedMap[7] != null ? "'" : null;
const valueChars = attrMatchedMap[6] || attrMatchedMap[7] || attrMatchedMap[8] || (quoteChars ? '' : null);
const invalid = !!(valueChars && quoteChars === null && /["'=<>`]/.test(valueChars)) ||
!!(equalChars && quoteChars === null && valueChars === null);
var offset = startOffset;
var attrToken = tokenizer_1.default(raw, line, col, offset);
var spacesBeforeName = tokenizer_1.default(spacesBeforeAttrString, line, col, offset);
let offset = startOffset;
const attrToken = tokenizer_1.default(raw, line, col, offset);
const spacesBeforeName = tokenizer_1.default(spacesBeforeAttrString, line, col, offset);
line = spacesBeforeName.endLine;
col = spacesBeforeName.endCol;
offset = spacesBeforeName.endOffset;
var name = tokenizer_1.default(nameChars, line, col, offset);
const name = tokenizer_1.default(nameChars, line, col, offset);
line = name.endLine;
col = name.endCol;
offset = name.endOffset;
var spacesBeforeEqual = tokenizer_1.default(spacesBeforeEqualChars, line, col, offset);
const spacesBeforeEqual = tokenizer_1.default(spacesBeforeEqualChars, line, col, offset);
line = spacesBeforeEqual.endLine;
col = spacesBeforeEqual.endCol;
offset = spacesBeforeEqual.endOffset;
var equal = tokenizer_1.default(equalChars, line, col, offset);
const equal = tokenizer_1.default(equalChars, line, col, offset);
line = equal.endLine;
col = equal.endCol;
offset = equal.endOffset;
var spacesAfterEqual = tokenizer_1.default(spacesAfterEqualChars, line, col, offset);
const spacesAfterEqual = tokenizer_1.default(spacesAfterEqualChars, line, col, offset);
line = spacesAfterEqual.endLine;
col = spacesAfterEqual.endCol;
offset = spacesAfterEqual.endOffset;
var startQuote = tokenizer_1.default(quoteChars, line, col, offset);
const startQuote = tokenizer_1.default(quoteChars, line, col, offset);
line = startQuote.endLine;
col = startQuote.endCol;
offset = startQuote.endOffset;
var value = tokenizer_1.default(valueChars, line, col, offset);
const value = tokenizer_1.default(valueChars, line, col, offset);
line = value.endLine;
col = value.endCol;
offset = value.endOffset;
var endQuote = tokenizer_1.default(quoteChars, line, col, offset);
const endQuote = tokenizer_1.default(quoteChars, line, col, offset);
line = endQuote.endLine;

@@ -68,10 +68,10 @@ col = endQuote.endCol;

endCol: attrToken.endCol,
spacesBeforeName: spacesBeforeName,
name: name,
spacesBeforeEqual: spacesBeforeEqual,
equal: equal,
spacesAfterEqual: spacesAfterEqual,
startQuote: startQuote,
value: value,
endQuote: endQuote,
spacesBeforeName,
name,
spacesBeforeEqual,
equal,
spacesAfterEqual,
startQuote,
value,
endQuote,
isInvalid: invalid,

@@ -78,0 +78,0 @@ };

@@ -21,3 +21,3 @@ "use strict";

*/
var rePCENChar = [
const rePCENChar = [
'\\-',

@@ -42,3 +42,3 @@ '\\.',

].join('|');
exports.rePCEN = new RegExp("^[a-z](?:" + rePCENChar + ")*\\-(?:" + rePCENChar + ")*$", 'i');
exports.rePCEN = new RegExp(`^[a-z](?:${rePCENChar})*\\-(?:${rePCENChar})*$`, 'i');
exports.reSplitterTag = /<[^>]+>/g;
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
function getEndCol(html, col) {
var lines = html.split(/\r?\n/);
var lineCount = lines.length;
var lastLine = lines.pop();
const lines = html.split(/\r?\n/);
const lineCount = lines.length;
const lastLine = lines.pop();
return lineCount > 1 ? lastLine.length + 1 : col + html.length;
}
exports.default = getEndCol;
export { default as isDocumentFragment } from './is-document-fragment';
export { default as nodeListToDebugMaps } from './node-list-to-debug-maps';
export { default as parse } from './parse';
export { flattenNodes } from './flatten-nodes';

@@ -9,1 +9,3 @@ "use strict";

exports.parse = parse_1.default;
var flatten_nodes_1 = require("./flatten-nodes");
exports.flattenNodes = flatten_nodes_1.flattenNodes;
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
var const_1 = require("./const");
const const_1 = require("./const");
/**

@@ -5,0 +5,0 @@ * valid name of custom element

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
function default_1(nodeList) {
return nodeList.map(function (n) {
return nodeList.map(n => {
if (!n.isGhost) {
return "[" + n.startLine + ":" + n.startCol + "]>[" + n.endLine + ":" + n.endCol + "](" + n.startOffset + "," + n.endOffset + ")" + n.nodeName + ": " + visibleWhiteSpace(n.raw);
return `[${n.startLine}:${n.startCol}]>[${n.endLine}:${n.endCol}](${n.startOffset},${n.endOffset})${n.nodeName}: ${visibleWhiteSpace(n.raw)}`;
}
else {
return "[N/A]>[N/A](N/A)" + n.nodeName + ": " + visibleWhiteSpace(n.raw);
return `[N/A]>[N/A](N/A)${n.nodeName}: ${visibleWhiteSpace(n.raw)}`;
}

@@ -11,0 +11,0 @@ });

@@ -6,34 +6,34 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
var const_1 = require("./const");
var attr_tokenizer_1 = __importDefault(require("./attr-tokenizer"));
var tokenizer_1 = __importDefault(require("./tokenizer"));
const const_1 = require("./const");
const attr_tokenizer_1 = __importDefault(require("./attr-tokenizer"));
const tokenizer_1 = __importDefault(require("./tokenizer"));
// eslint-disable-next-line no-control-regex
var reAttrsInStartTag = /\s*[^\x00-\x1f\x7f-\x9f "'>/=]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^\s]*))?/;
var reEndTokens = /(\s*\/)?(\s*)>$/;
const reAttrsInStartTag = /\s*[^\x00-\x1f\x7f-\x9f "'>/=]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^\s]*))?/;
const reEndTokens = /(\s*\/)?(\s*)>$/;
function parseRawTag(raw, nodeLine, nodeCol, startOffset) {
var line = nodeLine;
var col = nodeCol;
var offset = startOffset;
var matches = raw.match(const_1.reTag);
let line = nodeLine;
let col = nodeCol;
let offset = startOffset;
const matches = raw.match(const_1.reTag);
if (!matches) {
throw new SyntaxError("Invalid tag syntax: " + raw);
throw new SyntaxError(`Invalid tag syntax: ${raw}`);
}
var tagWithAttrs = matches[1];
const tagWithAttrs = matches[1];
// eslint-disable-next-line no-control-regex
var tagNameSplited = tagWithAttrs.split(/[\u0000\u0009\u000A\u000C\u0020/>]/);
var tagName = tagNameSplited[0] || tagNameSplited[1];
const tagNameSplited = tagWithAttrs.split(/[\u0000\u0009\u000A\u000C\u0020/>]/);
const tagName = tagNameSplited[0] || tagNameSplited[1];
if (!tagName || (!const_1.reTagName.test(tagName) && !const_1.rePCEN.test(tagName))) {
throw new SyntaxError("Invalid tag name: \"" + tagName + "\" in <" + tagWithAttrs + ">");
throw new SyntaxError(`Invalid tag name: "${tagName}" in <${tagWithAttrs}>`);
}
var tagStartPos = tagWithAttrs.indexOf(tagName);
var rawAttrs = tagWithAttrs.substring(tagStartPos + tagName.length);
const tagStartPos = tagWithAttrs.indexOf(tagName);
let rawAttrs = tagWithAttrs.substring(tagStartPos + tagName.length);
// console.log({ raw, tagStartPos, tagName, rawAttrs });
col += tagName.length + 1 + tagStartPos;
offset += tagName.length + 1 + tagStartPos;
var attrs = [];
const attrs = [];
while (reAttrsInStartTag.test(rawAttrs)) {
var attrMatchedMap = rawAttrs.match(reAttrsInStartTag);
const attrMatchedMap = rawAttrs.match(reAttrsInStartTag);
if (attrMatchedMap && attrMatchedMap[0]) {
var rawAttr = attrMatchedMap[0];
var attr = attr_tokenizer_1.default(rawAttr, line, col, offset);
const rawAttr = attrMatchedMap[0];
const attr = attr_tokenizer_1.default(rawAttr, line, col, offset);
line = attr.endLine;

@@ -46,15 +46,15 @@ col = attr.endCol;

}
var endTokens = reEndTokens.exec(raw);
var selfClosingSolidus = tokenizer_1.default(endTokens && endTokens[1], line, col, offset);
const endTokens = reEndTokens.exec(raw);
const selfClosingSolidus = tokenizer_1.default(endTokens && endTokens[1], line, col, offset);
line = selfClosingSolidus.endLine;
col = selfClosingSolidus.endCol;
offset = selfClosingSolidus.endOffset;
var endSpace = tokenizer_1.default(endTokens && endTokens[2], line, col, offset);
const endSpace = tokenizer_1.default(endTokens && endTokens[2], line, col, offset);
return {
tagName: tagName,
attrs: attrs,
selfClosingSolidus: selfClosingSolidus,
endSpace: endSpace,
tagName,
attrs,
selfClosingSolidus,
endSpace,
};
}
exports.default = parseRawTag;

@@ -1,4 +0,2 @@

import { MLASTDocument, MLASTNode } from '@markuplint/ml-ast';
import { MLASTDocument } from '@markuplint/ml-ast';
export default function parse(html: string): MLASTDocument;
export declare type Walker = (node: MLASTNode) => void;
export declare function walk(nodeList: MLASTNode[], walker: Walker): void;

@@ -6,32 +6,48 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
var ml_ast_1 = require("@markuplint/ml-ast");
var uuid_1 = __importDefault(require("uuid"));
var get_end_col_1 = __importDefault(require("./get-end-col"));
var get_end_line_1 = __importDefault(require("./get-end-line"));
var is_document_fragment_1 = __importDefault(require("./is-document-fragment"));
var parse5_1 = __importDefault(require("parse5"));
var parse_raw_tag_1 = __importDefault(require("./parse-raw-tag"));
var tag_splitter_1 = __importDefault(require("./tag-splitter"));
var P5_OPTIONS = { sourceCodeLocationInfo: true };
const ml_ast_1 = require("@markuplint/ml-ast");
const uuid_1 = __importDefault(require("uuid"));
const flatten_nodes_1 = require("./flatten-nodes");
const get_end_col_1 = __importDefault(require("./get-end-col"));
const get_end_line_1 = __importDefault(require("./get-end-line"));
const is_document_fragment_1 = __importDefault(require("./is-document-fragment"));
const parse5_1 = __importDefault(require("parse5"));
const parse_raw_tag_1 = __importDefault(require("./parse-raw-tag"));
const P5_OPTIONS = { sourceCodeLocationInfo: true };
function parse(html) {
var isFragment = is_document_fragment_1.default(html);
var doc = isFragment
const isFragment = is_document_fragment_1.default(html);
const doc = isFragment
? parse5_1.default.parseFragment(html, P5_OPTIONS)
: parse5_1.default.parse(html, P5_OPTIONS);
var nodeTree = traverse(doc, null, html, 0);
// console.dir(nodeTree);
var nodeList = flattenNodes(nodeTree, html);
// console.dir(nodeList);
var parsedDoc = {
nodeList: nodeList,
isFragment: isFragment,
const nodeList = flatten_nodes_1.flattenNodes(traverse(doc, null, html), html);
return {
nodeList,
isFragment,
};
return parsedDoc;
}
exports.default = parse;
function nodeize(p5node, prevNode, parentNode, rawHtml, depth) {
var nextNode = null;
if (!p5node.sourceCodeLocation) {
var prevToken = prevNode || parentNode;
var node = {
function traverse(rootNode, parentNode = null, rawHtml) {
const nodeList = [];
const childNodes = getChildNodes(rootNode);
let prevNode = null;
for (const p5node of childNodes) {
const node = nodeize(p5node, prevNode, parentNode, rawHtml);
if (!node) {
continue;
}
if (prevNode) {
if (node.type !== ml_ast_1.MLASTNodeType.EndTag) {
prevNode.nextNode = node;
}
node.prevNode = prevNode;
}
prevNode = node;
nodeList.push(node);
}
return nodeList;
}
function nodeize(originNode, prevNode, parentNode, rawHtml) {
const nextNode = null;
if (!originNode.sourceCodeLocation) {
const prevToken = prevNode || parentNode;
const node = {
uuid: uuid_1.default.v4(),

@@ -45,39 +61,39 @@ raw: '',

endCol: prevToken ? prevToken.endCol : 0,
nodeName: p5node.nodeName,
nodeName: originNode.nodeName,
type: ml_ast_1.MLASTNodeType.OmittedTag,
namespace: p5node.namespaceURI,
parentNode: parentNode,
prevNode: prevNode,
nextNode: nextNode,
namespace: originNode.namespaceURI,
parentNode,
prevNode,
nextNode,
isFragment: false,
isGhost: true,
};
node.childNodes = traverse(p5node, node, rawHtml, depth);
node.childNodes = traverse(originNode, node, rawHtml);
return node;
}
var _a = p5node.sourceCodeLocation, startOffset = _a.startOffset, endOffset = _a.endOffset, startLine = _a.startLine, endLine = _a.endLine, startCol = _a.startCol, endCol = _a.endCol;
var raw = rawHtml.slice(startOffset, endOffset || startOffset);
switch (p5node.nodeName) {
const { startOffset, endOffset, startLine, endLine, startCol, endCol } = originNode.sourceCodeLocation;
const raw = rawHtml.slice(startOffset, endOffset || startOffset);
switch (originNode.nodeName) {
case '#documentType': {
return {
uuid: uuid_1.default.v4(),
raw: raw,
raw,
// @ts-ignore
name: p5node.name || '',
name: originNode.name || '',
// @ts-ignore
publicId: p5node.publicId || '',
publicId: originNode.publicId || '',
// @ts-ignore
systemId: p5node.systemId || '',
startOffset: startOffset,
endOffset: endOffset,
startLine: startLine,
endLine: endLine,
startCol: startCol,
endCol: endCol,
systemId: originNode.systemId || '',
startOffset,
endOffset,
startLine,
endLine,
startCol,
endCol,
nodeName: '#doctype',
type: ml_ast_1.MLASTNodeType.Doctype,
parentNode: parentNode,
prevNode: prevNode,
parentNode,
prevNode,
_addPrevNode: 102,
nextNode: nextNode,
nextNode,
isFragment: false,

@@ -88,16 +104,16 @@ isGhost: false,

case '#text': {
var node = {
const node = {
uuid: uuid_1.default.v4(),
raw: raw,
startOffset: startOffset,
endOffset: endOffset,
startLine: startLine,
endLine: endLine,
startCol: startCol,
endCol: endCol,
raw,
startOffset,
endOffset,
startLine,
endLine,
startCol,
endCol,
nodeName: '#text',
type: ml_ast_1.MLASTNodeType.Text,
parentNode: parentNode,
prevNode: prevNode,
nextNode: nextNode,
parentNode,
prevNode,
nextNode,
isFragment: false,

@@ -111,14 +127,14 @@ isGhost: false,

uuid: uuid_1.default.v4(),
raw: raw,
startOffset: startOffset,
endOffset: endOffset,
startLine: startLine,
endLine: endLine,
startCol: startCol,
endCol: endCol,
raw,
startOffset,
endOffset,
startLine,
endLine,
startCol,
endCol,
nodeName: '#comment',
type: ml_ast_1.MLASTNodeType.Comment,
parentNode: parentNode,
prevNode: prevNode,
nextNode: nextNode,
parentNode,
prevNode,
nextNode,
isFragment: false,

@@ -129,14 +145,14 @@ isGhost: false,

default: {
var tagLoc = p5node.sourceCodeLocation.startTag;
var startTagRaw = p5node.sourceCodeLocation.startTag
const tagLoc = originNode.sourceCodeLocation.startTag;
const startTagRaw = originNode.sourceCodeLocation.startTag
? rawHtml.slice(tagLoc.startOffset, tagLoc.endOffset)
: rawHtml.slice(startOffset, endOffset || startOffset);
var tagTokens = parse_raw_tag_1.default(startTagRaw, p5node.sourceCodeLocation.startLine, p5node.sourceCodeLocation.startCol, p5node.sourceCodeLocation.startOffset);
var tagName = tagTokens.tagName;
var endTag = null;
var endTagLoc = p5node.sourceCodeLocation.endTag;
const tagTokens = parse_raw_tag_1.default(startTagRaw, startLine, startCol, startOffset);
const tagName = tagTokens.tagName;
let endTag = null;
const endTagLoc = originNode.sourceCodeLocation.endTag;
if (endTagLoc) {
var endTagRaw = rawHtml.slice(endTagLoc.startOffset, endTagLoc.endOffset);
var endTagTokens = parse_raw_tag_1.default(endTagRaw, endTagLoc.startLine, endTagLoc.startCol, endTagLoc.startOffset);
var endTagName = endTagTokens.tagName;
const endTagRaw = rawHtml.slice(endTagLoc.startOffset, endTagLoc.endOffset);
const endTagTokens = parse_raw_tag_1.default(endTagRaw, endTagLoc.startLine, endTagLoc.startCol, endTagLoc.startOffset);
const endTagName = endTagTokens.tagName;
endTag = {

@@ -153,7 +169,7 @@ uuid: uuid_1.default.v4(),

type: ml_ast_1.MLASTNodeType.EndTag,
namespace: p5node.namespaceURI,
namespace: originNode.namespaceURI,
attributes: endTagTokens.attrs,
parentNode: parentNode,
prevNode: prevNode,
nextNode: nextNode,
parentNode,
prevNode,
nextNode,
pearNode: null,

@@ -164,18 +180,18 @@ isFragment: false,

}
var startTag = {
const startTag = {
uuid: uuid_1.default.v4(),
raw: startTagRaw,
startOffset: startOffset,
startOffset,
endOffset: startOffset + startTagRaw.length,
startLine: startLine,
startLine,
endLine: get_end_line_1.default(startTagRaw, startLine),
startCol: startCol,
startCol,
endCol: get_end_col_1.default(startTagRaw, startCol),
nodeName: tagName,
type: ml_ast_1.MLASTNodeType.StartTag,
namespace: p5node.namespaceURI,
namespace: originNode.namespaceURI,
attributes: tagTokens.attrs,
parentNode: parentNode,
prevNode: prevNode,
nextNode: nextNode,
parentNode,
prevNode,
nextNode,
pearNode: endTag,

@@ -190,3 +206,3 @@ selfClosingSolidus: tagTokens.selfClosingSolidus,

}
startTag.childNodes = traverse(p5node, startTag, rawHtml, depth);
startTag.childNodes = traverse(originNode, startTag, rawHtml);
return startTag;

@@ -196,305 +212,2 @@ }

}
function traverse(rootNode, parentNode, rawHtml, depth) {
if (parentNode === void 0) { parentNode = null; }
depth += 1;
var nodeList = [];
var childNodes = getChildNodes(rootNode);
var prevNode = null;
for (var _i = 0, childNodes_1 = childNodes; _i < childNodes_1.length; _i++) {
var p5node = childNodes_1[_i];
var node = nodeize(p5node, prevNode, parentNode, rawHtml, depth);
if (!node) {
continue;
}
if (prevNode) {
if (node.type !== ml_ast_1.MLASTNodeType.EndTag) {
prevNode.nextNode = node;
}
node.prevNode = prevNode;
}
prevNode = node;
nodeList.push(node);
}
return nodeList;
}
function walk(nodeList, walker) {
for (var _i = 0, nodeList_1 = nodeList; _i < nodeList_1.length; _i++) {
var node = nodeList_1[_i];
walker(node);
var tag = node;
if (tag.childNodes && tag.childNodes.length) {
walk(tag.childNodes, walker);
}
if (tag.pearNode) {
walker(tag.pearNode);
}
}
}
exports.walk = walk;
function flattenNodes(nodeTree, rawHtml) {
var nodeOrders = [];
var prevLine = 1;
var prevCol = 1;
var currentStartOffset = 0;
var currentEndOffset = 0;
/**
* pushing list
*/
walk(nodeTree, function (node) {
currentStartOffset = node.startOffset;
var diff = currentStartOffset - currentEndOffset;
if (diff > 0) {
var html = rawHtml.slice(currentEndOffset, currentStartOffset);
/**
* first white spaces
*/
if (/^\s+$/.test(html)) {
var uuid = uuid_1.default.v4();
var spaces = html;
var textNode = {
uuid: uuid,
raw: spaces,
startOffset: currentEndOffset,
endOffset: currentEndOffset + spaces.length,
startLine: prevLine,
endLine: get_end_line_1.default(spaces, prevLine),
startCol: prevCol,
endCol: get_end_col_1.default(spaces, prevCol),
nodeName: '#text',
type: ml_ast_1.MLASTNodeType.Text,
parentNode: node.parentNode,
prevNode: node.prevNode,
nextNode: node,
isFragment: false,
isGhost: false,
};
node.prevNode = textNode;
if (node.parentNode && node.parentNode.childNodes) {
node.parentNode.childNodes.unshift(textNode);
}
nodeOrders.push(textNode);
}
else if (/^<\/[a-z0-9][a-z0-9:-]*>$/i.test(html)) {
// close tag
}
else {
// never
}
}
currentEndOffset = currentStartOffset + node.raw.length;
prevLine = node.endLine;
prevCol = node.endCol;
// for ghost nodes
node.startOffset = node.startOffset || currentStartOffset;
node.endOffset = node.endOffset || currentEndOffset;
nodeOrders.push(node);
});
{
/**
* Correction prev/next/parent
*/
var prevToken = null;
for (var _i = 0, nodeOrders_1 = nodeOrders; _i < nodeOrders_1.length; _i++) {
var node = nodeOrders_1[_i];
if (!prevToken) {
prevToken = node;
continue;
}
if (node.type !== ml_ast_1.MLASTNodeType.EndTag) {
prevToken = node;
continue;
}
var endTag = node;
if (endTag.nodeName.toLowerCase() === 'body' && prevToken.type === ml_ast_1.MLASTNodeType.Text) {
var prevWreckagesText = prevToken;
if (prevWreckagesText) {
var wreckages = tag_splitter_1.default(prevWreckagesText.raw, prevWreckagesText.startLine, prevWreckagesText.startCol);
if (wreckages.length) {
// console.log('wreckages\n', wreckages);
var lastText = wreckages[0];
var raw = lastText.raw;
var startLine = lastText.line;
var startCol = lastText.col;
prevWreckagesText.raw = raw;
prevWreckagesText.endOffset = prevWreckagesText.startOffset + raw.length;
prevWreckagesText.startLine = startLine;
prevWreckagesText.endLine = get_end_line_1.default(raw, startLine);
prevWreckagesText.startCol = startCol;
prevWreckagesText.endCol = get_end_col_1.default(raw, startCol);
}
}
}
}
}
/**
* sorting
*/
nodeOrders.sort(function (a, b) {
if (a.isGhost || b.isGhost) {
return 0;
}
return a.startOffset - b.startOffset;
});
{
/**
* remove duplicated node
*/
var stack_1 = {};
var removeIndexes_1 = [];
nodeOrders.forEach(function (node, i) {
if (node.isGhost) {
return;
}
var id = node.startLine + ":" + node.startCol + ":" + node.endLine + ":" + node.endCol;
if (stack_1[id] != null) {
removeIndexes_1.push(i);
}
stack_1[id] = i;
});
var r = nodeOrders.length;
while (r--) {
if (removeIndexes_1.includes(r)) {
nodeOrders.splice(r, 1);
}
}
}
{
/**
* getting last node
*/
var lastNode = null;
for (var _a = 0, nodeOrders_2 = nodeOrders; _a < nodeOrders_2.length; _a++) {
var node = nodeOrders_2[_a];
if (node.isGhost) {
continue;
}
lastNode = node;
}
if (lastNode) {
if (lastNode.type === ml_ast_1.MLASTNodeType.Text) {
// Correction for Parse5 AST
// prev node: ? -> html
lastNode.prevNode = lastNode.parentNode && lastNode.parentNode.parentNode;
if (lastNode.prevNode) {
lastNode.prevNode.nextNode = lastNode;
}
// parent node: body -> null
lastNode.parentNode = null;
// next node: ? -> null
lastNode.nextNode = null;
}
else {
/**
* create Last spaces
*/
var lastOffset_1 = 0;
nodeOrders.forEach(function (node, i) {
lastOffset_1 = Math.max(node.endOffset, lastOffset_1);
});
// console.log(lastOffset);
var lastTextContent = rawHtml.slice(lastOffset_1);
// console.log(`"${lastTextContent}"`);
if (lastTextContent) {
var uuid = uuid_1.default.v4();
var line = lastNode ? lastNode.endLine : 0;
var col = lastNode ? lastNode.endCol : 0;
var lastTextNode = {
uuid: uuid,
raw: lastTextContent,
startOffset: lastOffset_1,
endOffset: lastOffset_1 + lastTextContent.length,
startLine: line,
endLine: get_end_line_1.default(lastTextContent, line),
startCol: col,
endCol: get_end_col_1.default(lastTextContent, col),
nodeName: '#text',
type: ml_ast_1.MLASTNodeType.Text,
parentNode: null,
prevNode: lastNode,
nextNode: null,
isFragment: false,
isGhost: false,
};
if (lastNode) {
lastNode.nextNode = lastTextNode;
if ((lastNode.type === ml_ast_1.MLASTNodeType.StartTag || lastNode.type === ml_ast_1.MLASTNodeType.EndTag) &&
lastNode.pearNode) {
lastNode.pearNode.nextNode = lastTextNode;
}
}
nodeOrders.push(lastTextNode);
}
}
}
}
/**
* concat text nodes
*/
var result = [];
nodeOrders.forEach(function (node) {
var prevNode = result[result.length - 1] || null;
if (node.type === ml_ast_1.MLASTNodeType.Text && prevNode && prevNode.type === ml_ast_1.MLASTNodeType.Text) {
prevNode.raw = prevNode.raw + node.raw;
prevNode.endOffset = node.endOffset;
prevNode.endLine = node.endLine;
prevNode.endCol = node.endCol;
prevNode.nextNode = node.nextNode;
if (prevNode.parentNode && prevNode.parentNode.childNodes) {
prevNode.parentNode.childNodes = prevNode.parentNode.childNodes.filter(function (n) { return n.uuid !== node.uuid; });
}
if (node.nextNode) {
node.nextNode.prevNode = prevNode;
}
return;
}
result.push(node);
});
{
/**
* Correction prev/next/parent
*/
var prevToken = null;
var _loop_1 = function (node) {
if (!prevToken) {
prevToken = node;
return "continue";
}
if (((prevToken.type === ml_ast_1.MLASTNodeType.EndTag && prevToken.nodeName.toLowerCase() === 'body') ||
prevToken.type === ml_ast_1.MLASTNodeType.Doctype) &&
node.type === ml_ast_1.MLASTNodeType.Text) {
var nextNode = prevToken.nextNode;
prevToken.nextNode = node;
if (prevToken.type === ml_ast_1.MLASTNodeType.EndTag && prevToken.pearNode) {
prevToken.pearNode.nextNode = node;
}
node.prevNode = prevToken;
node.nextNode = nextNode;
node.parentNode = prevToken.parentNode;
}
// EndTag
if (node.type === ml_ast_1.MLASTNodeType.StartTag && node.pearNode) {
var endTag = node.pearNode;
endTag.pearNode = node;
endTag.prevNode = node.prevNode;
endTag.nextNode = node.nextNode;
}
// Children
if (node.type === ml_ast_1.MLASTNodeType.Text) {
var parent_1 = node.parentNode;
if (parent_1 && parent_1.type === ml_ast_1.MLASTNodeType.StartTag && parent_1.nodeName.toLowerCase() === 'html') {
if (parent_1.childNodes && !parent_1.childNodes.some(function (n) { return n.uuid === node.uuid; })) {
parent_1.childNodes.push(node);
}
}
}
prevToken = node;
};
for (var _b = 0, result_1 = result; _b < result_1.length; _b++) {
var node = result_1[_b];
_loop_1(node);
}
}
// console.log(nodeOrders.map((n, i) => `${i}: ${n.raw.trim()}`));
return result;
}
/**

@@ -508,3 +221,3 @@ * getChildNodes

if (rootNode.nodeName === 'noscript') {
var textNode = rootNode.childNodes[0];
const textNode = rootNode.childNodes[0];
if (!textNode || textNode.nodeName !== '#text') {

@@ -514,12 +227,12 @@ return [];

// @ts-ignore
var html = textNode.value;
const html = textNode.value;
// @ts-ignore
var _a = rootNode.sourceCodeLocation, startOffset = _a.startOffset, startLine = _a.startLine, startCol = _a.startCol;
var breakCount = startLine - 1;
var indentWidth = startCol - 1;
var spaces = ' '.repeat(startOffset - Math.max(breakCount, 0) - Math.max(indentWidth, 0)) +
const { startOffset, startLine, startCol } = rootNode.sourceCodeLocation;
const breakCount = startLine - 1;
const indentWidth = startCol - 1;
const spaces = ' '.repeat(startOffset - Math.max(breakCount, 0) - Math.max(indentWidth, 0)) +
'\n'.repeat(breakCount) +
' '.repeat(indentWidth);
var fragment = parse5_1.default.parseFragment(spaces + "<x-script>" + html + "</x-script>", P5_OPTIONS);
var childNodes = fragment.childNodes[spaces ? 1 : 0].childNodes;
const fragment = parse5_1.default.parseFragment(`${spaces}<x-script>${html}</x-script>`, P5_OPTIONS);
const childNodes = fragment.childNodes[spaces ? 1 : 0].childNodes;
return childNodes;

@@ -526,0 +239,0 @@ }

@@ -6,5 +6,5 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
var const_1 = require("./const");
var get_end_col_1 = __importDefault(require("./get-end-col"));
var get_end_line_1 = __importDefault(require("./get-end-line"));
const const_1 = require("./const");
const get_end_col_1 = __importDefault(require("./get-end-col"));
const get_end_line_1 = __importDefault(require("./get-end-line"));
function tagSplitter(raw, line, col) {

@@ -15,21 +15,20 @@ return withLocation(tagSplitterAsString(raw), line, col);

function tagSplitterAsString(raw) {
var tagMatches = raw.match(const_1.reSplitterTag);
const tagMatches = raw.match(const_1.reSplitterTag);
if (!tagMatches) {
return [raw];
}
var tokens = Array.from(tagMatches);
const tokens = Array.from(tagMatches);
tokens.unshift(); // remove all match
var nodes = [];
var rest = raw;
for (var _i = 0, tokens_1 = tokens; _i < tokens_1.length; _i++) {
var token = tokens_1[_i];
var index = rest.indexOf(token);
var length_1 = token.length;
const nodes = [];
let rest = raw;
for (const token of tokens) {
const index = rest.indexOf(token);
let length = token.length;
if (index > 0) {
var text = rest.slice(0, index);
const text = rest.slice(0, index);
nodes.push(text);
length_1 += text.length;
length += text.length;
}
nodes.push(token);
rest = rest.slice(length_1);
rest = rest.slice(length);
}

@@ -42,5 +41,4 @@ if (rest) {

function withLocation(nodes, line, col) {
var result = [];
for (var _i = 0, nodes_1 = nodes; _i < nodes_1.length; _i++) {
var node = nodes_1[_i];
const result = [];
for (const node of nodes) {
if (node[0] !== '<') {

@@ -50,8 +48,8 @@ result.push({

raw: node,
line: line,
col: col,
line,
col,
});
}
else {
var label = node.slice(1).slice(0, -1);
const label = node.slice(1).slice(0, -1);
if (const_1.reTagName.test(label)) {

@@ -61,4 +59,4 @@ result.push({

raw: node,
line: line,
col: col,
line,
col,
});

@@ -70,4 +68,4 @@ }

raw: node,
line: line,
col: col,
line,
col,
});

@@ -79,4 +77,4 @@ }

raw: node,
line: line,
col: col,
line,
col,
});

@@ -88,4 +86,4 @@ }

raw: node,
line: line,
col: col,
line,
col,
});

@@ -97,4 +95,4 @@ }

raw: node,
line: line,
col: col,
line,
col,
});

@@ -101,0 +99,0 @@ }

@@ -6,5 +6,5 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
var uuid_1 = __importDefault(require("uuid"));
var get_end_col_1 = __importDefault(require("./get-end-col"));
var get_end_line_1 = __importDefault(require("./get-end-line"));
const uuid_1 = __importDefault(require("uuid"));
const get_end_col_1 = __importDefault(require("./get-end-col"));
const get_end_line_1 = __importDefault(require("./get-end-line"));
function default_1(raw, line, col, startOffset) {

@@ -14,3 +14,3 @@ raw = raw || '';

uuid: uuid_1.default.v4(),
raw: raw,
raw,
startLine: line,

@@ -20,3 +20,3 @@ endLine: get_end_line_1.default(raw, line),

endCol: get_end_col_1.default(raw, col),
startOffset: startOffset,
startOffset,
endOffset: startOffset + raw.length,

@@ -23,0 +23,0 @@ };

{
"name": "@markuplint/html-parser",
"version": "1.0.0-alpha.13",
"version": "1.0.0-alpha.14",
"description": "HTML parser for markuplint",

@@ -28,3 +28,3 @@ "repository": "git@github.com:markuplint/markuplint.git",

},
"gitHead": "b3484907abfdd34b53007b5c08fb19c2eb2c1b66"
"gitHead": "a11698c694424cb94937705896ace0c6a6146b2b"
}

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc