@markuplint/html-parser
Advanced tools
Comparing version 1.0.0-alpha.13 to 1.0.0-alpha.14
@@ -6,51 +6,51 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var uuid_1 = __importDefault(require("uuid")); | ||
var tokenizer_1 = __importDefault(require("./tokenizer")); | ||
const uuid_1 = __importDefault(require("uuid")); | ||
const tokenizer_1 = __importDefault(require("./tokenizer")); | ||
// eslint-disable-next-line no-control-regex | ||
var reAttrsInStartTag = /(\s*)([^\x00-\x1f\x7f-\x9f "'>/=]+)(?:(\s*)(=)(\s*)(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s]*)))?/; | ||
const reAttrsInStartTag = /(\s*)([^\x00-\x1f\x7f-\x9f "'>/=]+)(?:(\s*)(=)(\s*)(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s]*)))?/; | ||
function attrTokenizer(raw, line, col, startOffset) { | ||
var attrMatchedMap = raw.match(reAttrsInStartTag); | ||
const attrMatchedMap = raw.match(reAttrsInStartTag); | ||
if (!attrMatchedMap) { | ||
throw new SyntaxError('Illegal attribute token'); | ||
} | ||
var spacesBeforeAttrString = attrMatchedMap[1]; | ||
var nameChars = attrMatchedMap[2]; | ||
var spacesBeforeEqualChars = attrMatchedMap[3] || ''; | ||
var equalChars = attrMatchedMap[4] || null; | ||
var spacesAfterEqualChars = attrMatchedMap[5] || ''; | ||
var quoteChars = attrMatchedMap[6] != null ? '"' : attrMatchedMap[7] != null ? "'" : null; | ||
var valueChars = attrMatchedMap[6] || attrMatchedMap[7] || attrMatchedMap[8] || (quoteChars ? '' : null); | ||
var invalid = !!(valueChars && quoteChars === null && /["'=<>`]/.test(valueChars)) || | ||
const spacesBeforeAttrString = attrMatchedMap[1]; | ||
const nameChars = attrMatchedMap[2]; | ||
const spacesBeforeEqualChars = attrMatchedMap[3] || ''; | ||
const equalChars = attrMatchedMap[4] || null; | ||
const spacesAfterEqualChars = attrMatchedMap[5] || ''; | ||
const quoteChars = attrMatchedMap[6] != null ? '"' : attrMatchedMap[7] != null ? "'" : null; | ||
const valueChars = attrMatchedMap[6] || attrMatchedMap[7] || attrMatchedMap[8] || (quoteChars ? '' : null); | ||
const invalid = !!(valueChars && quoteChars === null && /["'=<>`]/.test(valueChars)) || | ||
!!(equalChars && quoteChars === null && valueChars === null); | ||
var offset = startOffset; | ||
var attrToken = tokenizer_1.default(raw, line, col, offset); | ||
var spacesBeforeName = tokenizer_1.default(spacesBeforeAttrString, line, col, offset); | ||
let offset = startOffset; | ||
const attrToken = tokenizer_1.default(raw, line, col, offset); | ||
const spacesBeforeName = tokenizer_1.default(spacesBeforeAttrString, line, col, offset); | ||
line = spacesBeforeName.endLine; | ||
col = spacesBeforeName.endCol; | ||
offset = spacesBeforeName.endOffset; | ||
var name = tokenizer_1.default(nameChars, line, col, offset); | ||
const name = tokenizer_1.default(nameChars, line, col, offset); | ||
line = name.endLine; | ||
col = name.endCol; | ||
offset = name.endOffset; | ||
var spacesBeforeEqual = tokenizer_1.default(spacesBeforeEqualChars, line, col, offset); | ||
const spacesBeforeEqual = tokenizer_1.default(spacesBeforeEqualChars, line, col, offset); | ||
line = spacesBeforeEqual.endLine; | ||
col = spacesBeforeEqual.endCol; | ||
offset = spacesBeforeEqual.endOffset; | ||
var equal = tokenizer_1.default(equalChars, line, col, offset); | ||
const equal = tokenizer_1.default(equalChars, line, col, offset); | ||
line = equal.endLine; | ||
col = equal.endCol; | ||
offset = equal.endOffset; | ||
var spacesAfterEqual = tokenizer_1.default(spacesAfterEqualChars, line, col, offset); | ||
const spacesAfterEqual = tokenizer_1.default(spacesAfterEqualChars, line, col, offset); | ||
line = spacesAfterEqual.endLine; | ||
col = spacesAfterEqual.endCol; | ||
offset = spacesAfterEqual.endOffset; | ||
var startQuote = tokenizer_1.default(quoteChars, line, col, offset); | ||
const startQuote = tokenizer_1.default(quoteChars, line, col, offset); | ||
line = startQuote.endLine; | ||
col = startQuote.endCol; | ||
offset = startQuote.endOffset; | ||
var value = tokenizer_1.default(valueChars, line, col, offset); | ||
const value = tokenizer_1.default(valueChars, line, col, offset); | ||
line = value.endLine; | ||
col = value.endCol; | ||
offset = value.endOffset; | ||
var endQuote = tokenizer_1.default(quoteChars, line, col, offset); | ||
const endQuote = tokenizer_1.default(quoteChars, line, col, offset); | ||
line = endQuote.endLine; | ||
@@ -68,10 +68,10 @@ col = endQuote.endCol; | ||
endCol: attrToken.endCol, | ||
spacesBeforeName: spacesBeforeName, | ||
name: name, | ||
spacesBeforeEqual: spacesBeforeEqual, | ||
equal: equal, | ||
spacesAfterEqual: spacesAfterEqual, | ||
startQuote: startQuote, | ||
value: value, | ||
endQuote: endQuote, | ||
spacesBeforeName, | ||
name, | ||
spacesBeforeEqual, | ||
equal, | ||
spacesAfterEqual, | ||
startQuote, | ||
value, | ||
endQuote, | ||
isInvalid: invalid, | ||
@@ -78,0 +78,0 @@ }; |
@@ -21,3 +21,3 @@ "use strict"; | ||
*/ | ||
var rePCENChar = [ | ||
const rePCENChar = [ | ||
'\\-', | ||
@@ -42,3 +42,3 @@ '\\.', | ||
].join('|'); | ||
exports.rePCEN = new RegExp("^[a-z](?:" + rePCENChar + ")*\\-(?:" + rePCENChar + ")*$", 'i'); | ||
exports.rePCEN = new RegExp(`^[a-z](?:${rePCENChar})*\\-(?:${rePCENChar})*$`, 'i'); | ||
exports.reSplitterTag = /<[^>]+>/g; |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
function getEndCol(html, col) { | ||
var lines = html.split(/\r?\n/); | ||
var lineCount = lines.length; | ||
var lastLine = lines.pop(); | ||
const lines = html.split(/\r?\n/); | ||
const lineCount = lines.length; | ||
const lastLine = lines.pop(); | ||
return lineCount > 1 ? lastLine.length + 1 : col + html.length; | ||
} | ||
exports.default = getEndCol; |
export { default as isDocumentFragment } from './is-document-fragment'; | ||
export { default as nodeListToDebugMaps } from './node-list-to-debug-maps'; | ||
export { default as parse } from './parse'; | ||
export { flattenNodes } from './flatten-nodes'; |
@@ -9,1 +9,3 @@ "use strict"; | ||
exports.parse = parse_1.default; | ||
var flatten_nodes_1 = require("./flatten-nodes"); | ||
exports.flattenNodes = flatten_nodes_1.flattenNodes; |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var const_1 = require("./const"); | ||
const const_1 = require("./const"); | ||
/** | ||
@@ -5,0 +5,0 @@ * valid name of custom element |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
function default_1(nodeList) { | ||
return nodeList.map(function (n) { | ||
return nodeList.map(n => { | ||
if (!n.isGhost) { | ||
return "[" + n.startLine + ":" + n.startCol + "]>[" + n.endLine + ":" + n.endCol + "](" + n.startOffset + "," + n.endOffset + ")" + n.nodeName + ": " + visibleWhiteSpace(n.raw); | ||
return `[${n.startLine}:${n.startCol}]>[${n.endLine}:${n.endCol}](${n.startOffset},${n.endOffset})${n.nodeName}: ${visibleWhiteSpace(n.raw)}`; | ||
} | ||
else { | ||
return "[N/A]>[N/A](N/A)" + n.nodeName + ": " + visibleWhiteSpace(n.raw); | ||
return `[N/A]>[N/A](N/A)${n.nodeName}: ${visibleWhiteSpace(n.raw)}`; | ||
} | ||
@@ -11,0 +11,0 @@ }); |
@@ -6,34 +6,34 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var const_1 = require("./const"); | ||
var attr_tokenizer_1 = __importDefault(require("./attr-tokenizer")); | ||
var tokenizer_1 = __importDefault(require("./tokenizer")); | ||
const const_1 = require("./const"); | ||
const attr_tokenizer_1 = __importDefault(require("./attr-tokenizer")); | ||
const tokenizer_1 = __importDefault(require("./tokenizer")); | ||
// eslint-disable-next-line no-control-regex | ||
var reAttrsInStartTag = /\s*[^\x00-\x1f\x7f-\x9f "'>/=]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^\s]*))?/; | ||
var reEndTokens = /(\s*\/)?(\s*)>$/; | ||
const reAttrsInStartTag = /\s*[^\x00-\x1f\x7f-\x9f "'>/=]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^\s]*))?/; | ||
const reEndTokens = /(\s*\/)?(\s*)>$/; | ||
function parseRawTag(raw, nodeLine, nodeCol, startOffset) { | ||
var line = nodeLine; | ||
var col = nodeCol; | ||
var offset = startOffset; | ||
var matches = raw.match(const_1.reTag); | ||
let line = nodeLine; | ||
let col = nodeCol; | ||
let offset = startOffset; | ||
const matches = raw.match(const_1.reTag); | ||
if (!matches) { | ||
throw new SyntaxError("Invalid tag syntax: " + raw); | ||
throw new SyntaxError(`Invalid tag syntax: ${raw}`); | ||
} | ||
var tagWithAttrs = matches[1]; | ||
const tagWithAttrs = matches[1]; | ||
// eslint-disable-next-line no-control-regex | ||
var tagNameSplited = tagWithAttrs.split(/[\u0000\u0009\u000A\u000C\u0020/>]/); | ||
var tagName = tagNameSplited[0] || tagNameSplited[1]; | ||
const tagNameSplited = tagWithAttrs.split(/[\u0000\u0009\u000A\u000C\u0020/>]/); | ||
const tagName = tagNameSplited[0] || tagNameSplited[1]; | ||
if (!tagName || (!const_1.reTagName.test(tagName) && !const_1.rePCEN.test(tagName))) { | ||
throw new SyntaxError("Invalid tag name: \"" + tagName + "\" in <" + tagWithAttrs + ">"); | ||
throw new SyntaxError(`Invalid tag name: "${tagName}" in <${tagWithAttrs}>`); | ||
} | ||
var tagStartPos = tagWithAttrs.indexOf(tagName); | ||
var rawAttrs = tagWithAttrs.substring(tagStartPos + tagName.length); | ||
const tagStartPos = tagWithAttrs.indexOf(tagName); | ||
let rawAttrs = tagWithAttrs.substring(tagStartPos + tagName.length); | ||
// console.log({ raw, tagStartPos, tagName, rawAttrs }); | ||
col += tagName.length + 1 + tagStartPos; | ||
offset += tagName.length + 1 + tagStartPos; | ||
var attrs = []; | ||
const attrs = []; | ||
while (reAttrsInStartTag.test(rawAttrs)) { | ||
var attrMatchedMap = rawAttrs.match(reAttrsInStartTag); | ||
const attrMatchedMap = rawAttrs.match(reAttrsInStartTag); | ||
if (attrMatchedMap && attrMatchedMap[0]) { | ||
var rawAttr = attrMatchedMap[0]; | ||
var attr = attr_tokenizer_1.default(rawAttr, line, col, offset); | ||
const rawAttr = attrMatchedMap[0]; | ||
const attr = attr_tokenizer_1.default(rawAttr, line, col, offset); | ||
line = attr.endLine; | ||
@@ -46,15 +46,15 @@ col = attr.endCol; | ||
} | ||
var endTokens = reEndTokens.exec(raw); | ||
var selfClosingSolidus = tokenizer_1.default(endTokens && endTokens[1], line, col, offset); | ||
const endTokens = reEndTokens.exec(raw); | ||
const selfClosingSolidus = tokenizer_1.default(endTokens && endTokens[1], line, col, offset); | ||
line = selfClosingSolidus.endLine; | ||
col = selfClosingSolidus.endCol; | ||
offset = selfClosingSolidus.endOffset; | ||
var endSpace = tokenizer_1.default(endTokens && endTokens[2], line, col, offset); | ||
const endSpace = tokenizer_1.default(endTokens && endTokens[2], line, col, offset); | ||
return { | ||
tagName: tagName, | ||
attrs: attrs, | ||
selfClosingSolidus: selfClosingSolidus, | ||
endSpace: endSpace, | ||
tagName, | ||
attrs, | ||
selfClosingSolidus, | ||
endSpace, | ||
}; | ||
} | ||
exports.default = parseRawTag; |
@@ -1,4 +0,2 @@ | ||
import { MLASTDocument, MLASTNode } from '@markuplint/ml-ast'; | ||
import { MLASTDocument } from '@markuplint/ml-ast'; | ||
export default function parse(html: string): MLASTDocument; | ||
export declare type Walker = (node: MLASTNode) => void; | ||
export declare function walk(nodeList: MLASTNode[], walker: Walker): void; |
513
lib/parse.js
@@ -6,32 +6,48 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var ml_ast_1 = require("@markuplint/ml-ast"); | ||
var uuid_1 = __importDefault(require("uuid")); | ||
var get_end_col_1 = __importDefault(require("./get-end-col")); | ||
var get_end_line_1 = __importDefault(require("./get-end-line")); | ||
var is_document_fragment_1 = __importDefault(require("./is-document-fragment")); | ||
var parse5_1 = __importDefault(require("parse5")); | ||
var parse_raw_tag_1 = __importDefault(require("./parse-raw-tag")); | ||
var tag_splitter_1 = __importDefault(require("./tag-splitter")); | ||
var P5_OPTIONS = { sourceCodeLocationInfo: true }; | ||
const ml_ast_1 = require("@markuplint/ml-ast"); | ||
const uuid_1 = __importDefault(require("uuid")); | ||
const flatten_nodes_1 = require("./flatten-nodes"); | ||
const get_end_col_1 = __importDefault(require("./get-end-col")); | ||
const get_end_line_1 = __importDefault(require("./get-end-line")); | ||
const is_document_fragment_1 = __importDefault(require("./is-document-fragment")); | ||
const parse5_1 = __importDefault(require("parse5")); | ||
const parse_raw_tag_1 = __importDefault(require("./parse-raw-tag")); | ||
const P5_OPTIONS = { sourceCodeLocationInfo: true }; | ||
function parse(html) { | ||
var isFragment = is_document_fragment_1.default(html); | ||
var doc = isFragment | ||
const isFragment = is_document_fragment_1.default(html); | ||
const doc = isFragment | ||
? parse5_1.default.parseFragment(html, P5_OPTIONS) | ||
: parse5_1.default.parse(html, P5_OPTIONS); | ||
var nodeTree = traverse(doc, null, html, 0); | ||
// console.dir(nodeTree); | ||
var nodeList = flattenNodes(nodeTree, html); | ||
// console.dir(nodeList); | ||
var parsedDoc = { | ||
nodeList: nodeList, | ||
isFragment: isFragment, | ||
const nodeList = flatten_nodes_1.flattenNodes(traverse(doc, null, html), html); | ||
return { | ||
nodeList, | ||
isFragment, | ||
}; | ||
return parsedDoc; | ||
} | ||
exports.default = parse; | ||
function nodeize(p5node, prevNode, parentNode, rawHtml, depth) { | ||
var nextNode = null; | ||
if (!p5node.sourceCodeLocation) { | ||
var prevToken = prevNode || parentNode; | ||
var node = { | ||
function traverse(rootNode, parentNode = null, rawHtml) { | ||
const nodeList = []; | ||
const childNodes = getChildNodes(rootNode); | ||
let prevNode = null; | ||
for (const p5node of childNodes) { | ||
const node = nodeize(p5node, prevNode, parentNode, rawHtml); | ||
if (!node) { | ||
continue; | ||
} | ||
if (prevNode) { | ||
if (node.type !== ml_ast_1.MLASTNodeType.EndTag) { | ||
prevNode.nextNode = node; | ||
} | ||
node.prevNode = prevNode; | ||
} | ||
prevNode = node; | ||
nodeList.push(node); | ||
} | ||
return nodeList; | ||
} | ||
function nodeize(originNode, prevNode, parentNode, rawHtml) { | ||
const nextNode = null; | ||
if (!originNode.sourceCodeLocation) { | ||
const prevToken = prevNode || parentNode; | ||
const node = { | ||
uuid: uuid_1.default.v4(), | ||
@@ -45,39 +61,39 @@ raw: '', | ||
endCol: prevToken ? prevToken.endCol : 0, | ||
nodeName: p5node.nodeName, | ||
nodeName: originNode.nodeName, | ||
type: ml_ast_1.MLASTNodeType.OmittedTag, | ||
namespace: p5node.namespaceURI, | ||
parentNode: parentNode, | ||
prevNode: prevNode, | ||
nextNode: nextNode, | ||
namespace: originNode.namespaceURI, | ||
parentNode, | ||
prevNode, | ||
nextNode, | ||
isFragment: false, | ||
isGhost: true, | ||
}; | ||
node.childNodes = traverse(p5node, node, rawHtml, depth); | ||
node.childNodes = traverse(originNode, node, rawHtml); | ||
return node; | ||
} | ||
var _a = p5node.sourceCodeLocation, startOffset = _a.startOffset, endOffset = _a.endOffset, startLine = _a.startLine, endLine = _a.endLine, startCol = _a.startCol, endCol = _a.endCol; | ||
var raw = rawHtml.slice(startOffset, endOffset || startOffset); | ||
switch (p5node.nodeName) { | ||
const { startOffset, endOffset, startLine, endLine, startCol, endCol } = originNode.sourceCodeLocation; | ||
const raw = rawHtml.slice(startOffset, endOffset || startOffset); | ||
switch (originNode.nodeName) { | ||
case '#documentType': { | ||
return { | ||
uuid: uuid_1.default.v4(), | ||
raw: raw, | ||
raw, | ||
// @ts-ignore | ||
name: p5node.name || '', | ||
name: originNode.name || '', | ||
// @ts-ignore | ||
publicId: p5node.publicId || '', | ||
publicId: originNode.publicId || '', | ||
// @ts-ignore | ||
systemId: p5node.systemId || '', | ||
startOffset: startOffset, | ||
endOffset: endOffset, | ||
startLine: startLine, | ||
endLine: endLine, | ||
startCol: startCol, | ||
endCol: endCol, | ||
systemId: originNode.systemId || '', | ||
startOffset, | ||
endOffset, | ||
startLine, | ||
endLine, | ||
startCol, | ||
endCol, | ||
nodeName: '#doctype', | ||
type: ml_ast_1.MLASTNodeType.Doctype, | ||
parentNode: parentNode, | ||
prevNode: prevNode, | ||
parentNode, | ||
prevNode, | ||
_addPrevNode: 102, | ||
nextNode: nextNode, | ||
nextNode, | ||
isFragment: false, | ||
@@ -88,16 +104,16 @@ isGhost: false, | ||
case '#text': { | ||
var node = { | ||
const node = { | ||
uuid: uuid_1.default.v4(), | ||
raw: raw, | ||
startOffset: startOffset, | ||
endOffset: endOffset, | ||
startLine: startLine, | ||
endLine: endLine, | ||
startCol: startCol, | ||
endCol: endCol, | ||
raw, | ||
startOffset, | ||
endOffset, | ||
startLine, | ||
endLine, | ||
startCol, | ||
endCol, | ||
nodeName: '#text', | ||
type: ml_ast_1.MLASTNodeType.Text, | ||
parentNode: parentNode, | ||
prevNode: prevNode, | ||
nextNode: nextNode, | ||
parentNode, | ||
prevNode, | ||
nextNode, | ||
isFragment: false, | ||
@@ -111,14 +127,14 @@ isGhost: false, | ||
uuid: uuid_1.default.v4(), | ||
raw: raw, | ||
startOffset: startOffset, | ||
endOffset: endOffset, | ||
startLine: startLine, | ||
endLine: endLine, | ||
startCol: startCol, | ||
endCol: endCol, | ||
raw, | ||
startOffset, | ||
endOffset, | ||
startLine, | ||
endLine, | ||
startCol, | ||
endCol, | ||
nodeName: '#comment', | ||
type: ml_ast_1.MLASTNodeType.Comment, | ||
parentNode: parentNode, | ||
prevNode: prevNode, | ||
nextNode: nextNode, | ||
parentNode, | ||
prevNode, | ||
nextNode, | ||
isFragment: false, | ||
@@ -129,14 +145,14 @@ isGhost: false, | ||
default: { | ||
var tagLoc = p5node.sourceCodeLocation.startTag; | ||
var startTagRaw = p5node.sourceCodeLocation.startTag | ||
const tagLoc = originNode.sourceCodeLocation.startTag; | ||
const startTagRaw = originNode.sourceCodeLocation.startTag | ||
? rawHtml.slice(tagLoc.startOffset, tagLoc.endOffset) | ||
: rawHtml.slice(startOffset, endOffset || startOffset); | ||
var tagTokens = parse_raw_tag_1.default(startTagRaw, p5node.sourceCodeLocation.startLine, p5node.sourceCodeLocation.startCol, p5node.sourceCodeLocation.startOffset); | ||
var tagName = tagTokens.tagName; | ||
var endTag = null; | ||
var endTagLoc = p5node.sourceCodeLocation.endTag; | ||
const tagTokens = parse_raw_tag_1.default(startTagRaw, startLine, startCol, startOffset); | ||
const tagName = tagTokens.tagName; | ||
let endTag = null; | ||
const endTagLoc = originNode.sourceCodeLocation.endTag; | ||
if (endTagLoc) { | ||
var endTagRaw = rawHtml.slice(endTagLoc.startOffset, endTagLoc.endOffset); | ||
var endTagTokens = parse_raw_tag_1.default(endTagRaw, endTagLoc.startLine, endTagLoc.startCol, endTagLoc.startOffset); | ||
var endTagName = endTagTokens.tagName; | ||
const endTagRaw = rawHtml.slice(endTagLoc.startOffset, endTagLoc.endOffset); | ||
const endTagTokens = parse_raw_tag_1.default(endTagRaw, endTagLoc.startLine, endTagLoc.startCol, endTagLoc.startOffset); | ||
const endTagName = endTagTokens.tagName; | ||
endTag = { | ||
@@ -153,7 +169,7 @@ uuid: uuid_1.default.v4(), | ||
type: ml_ast_1.MLASTNodeType.EndTag, | ||
namespace: p5node.namespaceURI, | ||
namespace: originNode.namespaceURI, | ||
attributes: endTagTokens.attrs, | ||
parentNode: parentNode, | ||
prevNode: prevNode, | ||
nextNode: nextNode, | ||
parentNode, | ||
prevNode, | ||
nextNode, | ||
pearNode: null, | ||
@@ -164,18 +180,18 @@ isFragment: false, | ||
} | ||
var startTag = { | ||
const startTag = { | ||
uuid: uuid_1.default.v4(), | ||
raw: startTagRaw, | ||
startOffset: startOffset, | ||
startOffset, | ||
endOffset: startOffset + startTagRaw.length, | ||
startLine: startLine, | ||
startLine, | ||
endLine: get_end_line_1.default(startTagRaw, startLine), | ||
startCol: startCol, | ||
startCol, | ||
endCol: get_end_col_1.default(startTagRaw, startCol), | ||
nodeName: tagName, | ||
type: ml_ast_1.MLASTNodeType.StartTag, | ||
namespace: p5node.namespaceURI, | ||
namespace: originNode.namespaceURI, | ||
attributes: tagTokens.attrs, | ||
parentNode: parentNode, | ||
prevNode: prevNode, | ||
nextNode: nextNode, | ||
parentNode, | ||
prevNode, | ||
nextNode, | ||
pearNode: endTag, | ||
@@ -190,3 +206,3 @@ selfClosingSolidus: tagTokens.selfClosingSolidus, | ||
} | ||
startTag.childNodes = traverse(p5node, startTag, rawHtml, depth); | ||
startTag.childNodes = traverse(originNode, startTag, rawHtml); | ||
return startTag; | ||
@@ -196,305 +212,2 @@ } | ||
} | ||
function traverse(rootNode, parentNode, rawHtml, depth) { | ||
if (parentNode === void 0) { parentNode = null; } | ||
depth += 1; | ||
var nodeList = []; | ||
var childNodes = getChildNodes(rootNode); | ||
var prevNode = null; | ||
for (var _i = 0, childNodes_1 = childNodes; _i < childNodes_1.length; _i++) { | ||
var p5node = childNodes_1[_i]; | ||
var node = nodeize(p5node, prevNode, parentNode, rawHtml, depth); | ||
if (!node) { | ||
continue; | ||
} | ||
if (prevNode) { | ||
if (node.type !== ml_ast_1.MLASTNodeType.EndTag) { | ||
prevNode.nextNode = node; | ||
} | ||
node.prevNode = prevNode; | ||
} | ||
prevNode = node; | ||
nodeList.push(node); | ||
} | ||
return nodeList; | ||
} | ||
function walk(nodeList, walker) { | ||
for (var _i = 0, nodeList_1 = nodeList; _i < nodeList_1.length; _i++) { | ||
var node = nodeList_1[_i]; | ||
walker(node); | ||
var tag = node; | ||
if (tag.childNodes && tag.childNodes.length) { | ||
walk(tag.childNodes, walker); | ||
} | ||
if (tag.pearNode) { | ||
walker(tag.pearNode); | ||
} | ||
} | ||
} | ||
exports.walk = walk; | ||
function flattenNodes(nodeTree, rawHtml) { | ||
var nodeOrders = []; | ||
var prevLine = 1; | ||
var prevCol = 1; | ||
var currentStartOffset = 0; | ||
var currentEndOffset = 0; | ||
/** | ||
* pushing list | ||
*/ | ||
walk(nodeTree, function (node) { | ||
currentStartOffset = node.startOffset; | ||
var diff = currentStartOffset - currentEndOffset; | ||
if (diff > 0) { | ||
var html = rawHtml.slice(currentEndOffset, currentStartOffset); | ||
/** | ||
* first white spaces | ||
*/ | ||
if (/^\s+$/.test(html)) { | ||
var uuid = uuid_1.default.v4(); | ||
var spaces = html; | ||
var textNode = { | ||
uuid: uuid, | ||
raw: spaces, | ||
startOffset: currentEndOffset, | ||
endOffset: currentEndOffset + spaces.length, | ||
startLine: prevLine, | ||
endLine: get_end_line_1.default(spaces, prevLine), | ||
startCol: prevCol, | ||
endCol: get_end_col_1.default(spaces, prevCol), | ||
nodeName: '#text', | ||
type: ml_ast_1.MLASTNodeType.Text, | ||
parentNode: node.parentNode, | ||
prevNode: node.prevNode, | ||
nextNode: node, | ||
isFragment: false, | ||
isGhost: false, | ||
}; | ||
node.prevNode = textNode; | ||
if (node.parentNode && node.parentNode.childNodes) { | ||
node.parentNode.childNodes.unshift(textNode); | ||
} | ||
nodeOrders.push(textNode); | ||
} | ||
else if (/^<\/[a-z0-9][a-z0-9:-]*>$/i.test(html)) { | ||
// close tag | ||
} | ||
else { | ||
// never | ||
} | ||
} | ||
currentEndOffset = currentStartOffset + node.raw.length; | ||
prevLine = node.endLine; | ||
prevCol = node.endCol; | ||
// for ghost nodes | ||
node.startOffset = node.startOffset || currentStartOffset; | ||
node.endOffset = node.endOffset || currentEndOffset; | ||
nodeOrders.push(node); | ||
}); | ||
{ | ||
/** | ||
* Correction prev/next/parent | ||
*/ | ||
var prevToken = null; | ||
for (var _i = 0, nodeOrders_1 = nodeOrders; _i < nodeOrders_1.length; _i++) { | ||
var node = nodeOrders_1[_i]; | ||
if (!prevToken) { | ||
prevToken = node; | ||
continue; | ||
} | ||
if (node.type !== ml_ast_1.MLASTNodeType.EndTag) { | ||
prevToken = node; | ||
continue; | ||
} | ||
var endTag = node; | ||
if (endTag.nodeName.toLowerCase() === 'body' && prevToken.type === ml_ast_1.MLASTNodeType.Text) { | ||
var prevWreckagesText = prevToken; | ||
if (prevWreckagesText) { | ||
var wreckages = tag_splitter_1.default(prevWreckagesText.raw, prevWreckagesText.startLine, prevWreckagesText.startCol); | ||
if (wreckages.length) { | ||
// console.log('wreckages\n', wreckages); | ||
var lastText = wreckages[0]; | ||
var raw = lastText.raw; | ||
var startLine = lastText.line; | ||
var startCol = lastText.col; | ||
prevWreckagesText.raw = raw; | ||
prevWreckagesText.endOffset = prevWreckagesText.startOffset + raw.length; | ||
prevWreckagesText.startLine = startLine; | ||
prevWreckagesText.endLine = get_end_line_1.default(raw, startLine); | ||
prevWreckagesText.startCol = startCol; | ||
prevWreckagesText.endCol = get_end_col_1.default(raw, startCol); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
/** | ||
* sorting | ||
*/ | ||
nodeOrders.sort(function (a, b) { | ||
if (a.isGhost || b.isGhost) { | ||
return 0; | ||
} | ||
return a.startOffset - b.startOffset; | ||
}); | ||
{ | ||
/** | ||
* remove duplicated node | ||
*/ | ||
var stack_1 = {}; | ||
var removeIndexes_1 = []; | ||
nodeOrders.forEach(function (node, i) { | ||
if (node.isGhost) { | ||
return; | ||
} | ||
var id = node.startLine + ":" + node.startCol + ":" + node.endLine + ":" + node.endCol; | ||
if (stack_1[id] != null) { | ||
removeIndexes_1.push(i); | ||
} | ||
stack_1[id] = i; | ||
}); | ||
var r = nodeOrders.length; | ||
while (r--) { | ||
if (removeIndexes_1.includes(r)) { | ||
nodeOrders.splice(r, 1); | ||
} | ||
} | ||
} | ||
{ | ||
/** | ||
* getting last node | ||
*/ | ||
var lastNode = null; | ||
for (var _a = 0, nodeOrders_2 = nodeOrders; _a < nodeOrders_2.length; _a++) { | ||
var node = nodeOrders_2[_a]; | ||
if (node.isGhost) { | ||
continue; | ||
} | ||
lastNode = node; | ||
} | ||
if (lastNode) { | ||
if (lastNode.type === ml_ast_1.MLASTNodeType.Text) { | ||
// Correction for Parse5 AST | ||
// prev node: ? -> html | ||
lastNode.prevNode = lastNode.parentNode && lastNode.parentNode.parentNode; | ||
if (lastNode.prevNode) { | ||
lastNode.prevNode.nextNode = lastNode; | ||
} | ||
// parent node: body -> null | ||
lastNode.parentNode = null; | ||
// next node: ? -> null | ||
lastNode.nextNode = null; | ||
} | ||
else { | ||
/** | ||
* create Last spaces | ||
*/ | ||
var lastOffset_1 = 0; | ||
nodeOrders.forEach(function (node, i) { | ||
lastOffset_1 = Math.max(node.endOffset, lastOffset_1); | ||
}); | ||
// console.log(lastOffset); | ||
var lastTextContent = rawHtml.slice(lastOffset_1); | ||
// console.log(`"${lastTextContent}"`); | ||
if (lastTextContent) { | ||
var uuid = uuid_1.default.v4(); | ||
var line = lastNode ? lastNode.endLine : 0; | ||
var col = lastNode ? lastNode.endCol : 0; | ||
var lastTextNode = { | ||
uuid: uuid, | ||
raw: lastTextContent, | ||
startOffset: lastOffset_1, | ||
endOffset: lastOffset_1 + lastTextContent.length, | ||
startLine: line, | ||
endLine: get_end_line_1.default(lastTextContent, line), | ||
startCol: col, | ||
endCol: get_end_col_1.default(lastTextContent, col), | ||
nodeName: '#text', | ||
type: ml_ast_1.MLASTNodeType.Text, | ||
parentNode: null, | ||
prevNode: lastNode, | ||
nextNode: null, | ||
isFragment: false, | ||
isGhost: false, | ||
}; | ||
if (lastNode) { | ||
lastNode.nextNode = lastTextNode; | ||
if ((lastNode.type === ml_ast_1.MLASTNodeType.StartTag || lastNode.type === ml_ast_1.MLASTNodeType.EndTag) && | ||
lastNode.pearNode) { | ||
lastNode.pearNode.nextNode = lastTextNode; | ||
} | ||
} | ||
nodeOrders.push(lastTextNode); | ||
} | ||
} | ||
} | ||
} | ||
/** | ||
* concat text nodes | ||
*/ | ||
var result = []; | ||
nodeOrders.forEach(function (node) { | ||
var prevNode = result[result.length - 1] || null; | ||
if (node.type === ml_ast_1.MLASTNodeType.Text && prevNode && prevNode.type === ml_ast_1.MLASTNodeType.Text) { | ||
prevNode.raw = prevNode.raw + node.raw; | ||
prevNode.endOffset = node.endOffset; | ||
prevNode.endLine = node.endLine; | ||
prevNode.endCol = node.endCol; | ||
prevNode.nextNode = node.nextNode; | ||
if (prevNode.parentNode && prevNode.parentNode.childNodes) { | ||
prevNode.parentNode.childNodes = prevNode.parentNode.childNodes.filter(function (n) { return n.uuid !== node.uuid; }); | ||
} | ||
if (node.nextNode) { | ||
node.nextNode.prevNode = prevNode; | ||
} | ||
return; | ||
} | ||
result.push(node); | ||
}); | ||
{ | ||
/** | ||
* Correction prev/next/parent | ||
*/ | ||
var prevToken = null; | ||
var _loop_1 = function (node) { | ||
if (!prevToken) { | ||
prevToken = node; | ||
return "continue"; | ||
} | ||
if (((prevToken.type === ml_ast_1.MLASTNodeType.EndTag && prevToken.nodeName.toLowerCase() === 'body') || | ||
prevToken.type === ml_ast_1.MLASTNodeType.Doctype) && | ||
node.type === ml_ast_1.MLASTNodeType.Text) { | ||
var nextNode = prevToken.nextNode; | ||
prevToken.nextNode = node; | ||
if (prevToken.type === ml_ast_1.MLASTNodeType.EndTag && prevToken.pearNode) { | ||
prevToken.pearNode.nextNode = node; | ||
} | ||
node.prevNode = prevToken; | ||
node.nextNode = nextNode; | ||
node.parentNode = prevToken.parentNode; | ||
} | ||
// EndTag | ||
if (node.type === ml_ast_1.MLASTNodeType.StartTag && node.pearNode) { | ||
var endTag = node.pearNode; | ||
endTag.pearNode = node; | ||
endTag.prevNode = node.prevNode; | ||
endTag.nextNode = node.nextNode; | ||
} | ||
// Children | ||
if (node.type === ml_ast_1.MLASTNodeType.Text) { | ||
var parent_1 = node.parentNode; | ||
if (parent_1 && parent_1.type === ml_ast_1.MLASTNodeType.StartTag && parent_1.nodeName.toLowerCase() === 'html') { | ||
if (parent_1.childNodes && !parent_1.childNodes.some(function (n) { return n.uuid === node.uuid; })) { | ||
parent_1.childNodes.push(node); | ||
} | ||
} | ||
} | ||
prevToken = node; | ||
}; | ||
for (var _b = 0, result_1 = result; _b < result_1.length; _b++) { | ||
var node = result_1[_b]; | ||
_loop_1(node); | ||
} | ||
} | ||
// console.log(nodeOrders.map((n, i) => `${i}: ${n.raw.trim()}`)); | ||
return result; | ||
} | ||
/** | ||
@@ -508,3 +221,3 @@ * getChildNodes | ||
if (rootNode.nodeName === 'noscript') { | ||
var textNode = rootNode.childNodes[0]; | ||
const textNode = rootNode.childNodes[0]; | ||
if (!textNode || textNode.nodeName !== '#text') { | ||
@@ -514,12 +227,12 @@ return []; | ||
// @ts-ignore | ||
var html = textNode.value; | ||
const html = textNode.value; | ||
// @ts-ignore | ||
var _a = rootNode.sourceCodeLocation, startOffset = _a.startOffset, startLine = _a.startLine, startCol = _a.startCol; | ||
var breakCount = startLine - 1; | ||
var indentWidth = startCol - 1; | ||
var spaces = ' '.repeat(startOffset - Math.max(breakCount, 0) - Math.max(indentWidth, 0)) + | ||
const { startOffset, startLine, startCol } = rootNode.sourceCodeLocation; | ||
const breakCount = startLine - 1; | ||
const indentWidth = startCol - 1; | ||
const spaces = ' '.repeat(startOffset - Math.max(breakCount, 0) - Math.max(indentWidth, 0)) + | ||
'\n'.repeat(breakCount) + | ||
' '.repeat(indentWidth); | ||
var fragment = parse5_1.default.parseFragment(spaces + "<x-script>" + html + "</x-script>", P5_OPTIONS); | ||
var childNodes = fragment.childNodes[spaces ? 1 : 0].childNodes; | ||
const fragment = parse5_1.default.parseFragment(`${spaces}<x-script>${html}</x-script>`, P5_OPTIONS); | ||
const childNodes = fragment.childNodes[spaces ? 1 : 0].childNodes; | ||
return childNodes; | ||
@@ -526,0 +239,0 @@ } |
@@ -6,5 +6,5 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var const_1 = require("./const"); | ||
var get_end_col_1 = __importDefault(require("./get-end-col")); | ||
var get_end_line_1 = __importDefault(require("./get-end-line")); | ||
const const_1 = require("./const"); | ||
const get_end_col_1 = __importDefault(require("./get-end-col")); | ||
const get_end_line_1 = __importDefault(require("./get-end-line")); | ||
function tagSplitter(raw, line, col) { | ||
@@ -15,21 +15,20 @@ return withLocation(tagSplitterAsString(raw), line, col); | ||
function tagSplitterAsString(raw) { | ||
var tagMatches = raw.match(const_1.reSplitterTag); | ||
const tagMatches = raw.match(const_1.reSplitterTag); | ||
if (!tagMatches) { | ||
return [raw]; | ||
} | ||
var tokens = Array.from(tagMatches); | ||
const tokens = Array.from(tagMatches); | ||
tokens.unshift(); // remove all match | ||
var nodes = []; | ||
var rest = raw; | ||
for (var _i = 0, tokens_1 = tokens; _i < tokens_1.length; _i++) { | ||
var token = tokens_1[_i]; | ||
var index = rest.indexOf(token); | ||
var length_1 = token.length; | ||
const nodes = []; | ||
let rest = raw; | ||
for (const token of tokens) { | ||
const index = rest.indexOf(token); | ||
let length = token.length; | ||
if (index > 0) { | ||
var text = rest.slice(0, index); | ||
const text = rest.slice(0, index); | ||
nodes.push(text); | ||
length_1 += text.length; | ||
length += text.length; | ||
} | ||
nodes.push(token); | ||
rest = rest.slice(length_1); | ||
rest = rest.slice(length); | ||
} | ||
@@ -42,5 +41,4 @@ if (rest) { | ||
function withLocation(nodes, line, col) { | ||
var result = []; | ||
for (var _i = 0, nodes_1 = nodes; _i < nodes_1.length; _i++) { | ||
var node = nodes_1[_i]; | ||
const result = []; | ||
for (const node of nodes) { | ||
if (node[0] !== '<') { | ||
@@ -50,8 +48,8 @@ result.push({ | ||
raw: node, | ||
line: line, | ||
col: col, | ||
line, | ||
col, | ||
}); | ||
} | ||
else { | ||
var label = node.slice(1).slice(0, -1); | ||
const label = node.slice(1).slice(0, -1); | ||
if (const_1.reTagName.test(label)) { | ||
@@ -61,4 +59,4 @@ result.push({ | ||
raw: node, | ||
line: line, | ||
col: col, | ||
line, | ||
col, | ||
}); | ||
@@ -70,4 +68,4 @@ } | ||
raw: node, | ||
line: line, | ||
col: col, | ||
line, | ||
col, | ||
}); | ||
@@ -79,4 +77,4 @@ } | ||
raw: node, | ||
line: line, | ||
col: col, | ||
line, | ||
col, | ||
}); | ||
@@ -88,4 +86,4 @@ } | ||
raw: node, | ||
line: line, | ||
col: col, | ||
line, | ||
col, | ||
}); | ||
@@ -97,4 +95,4 @@ } | ||
raw: node, | ||
line: line, | ||
col: col, | ||
line, | ||
col, | ||
}); | ||
@@ -101,0 +99,0 @@ } |
@@ -6,5 +6,5 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var uuid_1 = __importDefault(require("uuid")); | ||
var get_end_col_1 = __importDefault(require("./get-end-col")); | ||
var get_end_line_1 = __importDefault(require("./get-end-line")); | ||
const uuid_1 = __importDefault(require("uuid")); | ||
const get_end_col_1 = __importDefault(require("./get-end-col")); | ||
const get_end_line_1 = __importDefault(require("./get-end-line")); | ||
function default_1(raw, line, col, startOffset) { | ||
@@ -14,3 +14,3 @@ raw = raw || ''; | ||
uuid: uuid_1.default.v4(), | ||
raw: raw, | ||
raw, | ||
startLine: line, | ||
@@ -20,3 +20,3 @@ endLine: get_end_line_1.default(raw, line), | ||
endCol: get_end_col_1.default(raw, col), | ||
startOffset: startOffset, | ||
startOffset, | ||
endOffset: startOffset + raw.length, | ||
@@ -23,0 +23,0 @@ }; |
{ | ||
"name": "@markuplint/html-parser", | ||
"version": "1.0.0-alpha.13", | ||
"version": "1.0.0-alpha.14", | ||
"description": "HTML parser for markuplint", | ||
@@ -28,3 +28,3 @@ "repository": "git@github.com:markuplint/markuplint.git", | ||
}, | ||
"gitHead": "b3484907abfdd34b53007b5c08fb19c2eb2c1b66" | ||
"gitHead": "a11698c694424cb94937705896ace0c6a6146b2b" | ||
} |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
179321
32
965
1