@markuplint/html-parser
Advanced tools
Comparing version 1.2.0 to 1.3.0
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
const ml_ast_1 = require("@markuplint/ml-ast"); | ||
const parser_utils_1 = require("@markuplint/parser-utils"); | ||
// eslint-disable-next-line no-control-regex | ||
@@ -21,35 +21,35 @@ const reAttrsInStartTag = /(\s*)([^\x00-\x1f\x7f-\x9f "'>/=]+)(?:(\s*)(=)(\s*)(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s]*)))?/; | ||
let offset = startOffset; | ||
const attrToken = ml_ast_1.tokenizer(raw, line, col, offset); | ||
const attrToken = parser_utils_1.tokenizer(raw, line, col, offset); | ||
line = attrToken.startLine; | ||
col = attrToken.startCol; | ||
offset = attrToken.startOffset; | ||
const spacesBeforeName = ml_ast_1.tokenizer(spacesBeforeAttrString, line, col, offset); | ||
const spacesBeforeName = parser_utils_1.tokenizer(spacesBeforeAttrString, line, col, offset); | ||
line = spacesBeforeName.endLine; | ||
col = spacesBeforeName.endCol; | ||
offset = spacesBeforeName.endOffset; | ||
const name = ml_ast_1.tokenizer(nameChars, line, col, offset); | ||
const name = parser_utils_1.tokenizer(nameChars, line, col, offset); | ||
line = name.endLine; | ||
col = name.endCol; | ||
offset = name.endOffset; | ||
const spacesBeforeEqual = ml_ast_1.tokenizer(spacesBeforeEqualChars, line, col, offset); | ||
const spacesBeforeEqual = parser_utils_1.tokenizer(spacesBeforeEqualChars, line, col, offset); | ||
line = spacesBeforeEqual.endLine; | ||
col = spacesBeforeEqual.endCol; | ||
offset = spacesBeforeEqual.endOffset; | ||
const equal = ml_ast_1.tokenizer(equalChars, line, col, offset); | ||
const equal = parser_utils_1.tokenizer(equalChars, line, col, offset); | ||
line = equal.endLine; | ||
col = equal.endCol; | ||
offset = equal.endOffset; | ||
const spacesAfterEqual = ml_ast_1.tokenizer(spacesAfterEqualChars, line, col, offset); | ||
const spacesAfterEqual = parser_utils_1.tokenizer(spacesAfterEqualChars, line, col, offset); | ||
line = spacesAfterEqual.endLine; | ||
col = spacesAfterEqual.endCol; | ||
offset = spacesAfterEqual.endOffset; | ||
const startQuote = ml_ast_1.tokenizer(quoteChars, line, col, offset); | ||
const startQuote = parser_utils_1.tokenizer(quoteChars, line, col, offset); | ||
line = startQuote.endLine; | ||
col = startQuote.endCol; | ||
offset = startQuote.endOffset; | ||
const value = ml_ast_1.tokenizer(valueChars, line, col, offset); | ||
const value = parser_utils_1.tokenizer(valueChars, line, col, offset); | ||
line = value.endLine; | ||
col = value.endCol; | ||
offset = value.endOffset; | ||
const endQuote = ml_ast_1.tokenizer(quoteChars, line, col, offset); | ||
const endQuote = parser_utils_1.tokenizer(quoteChars, line, col, offset); | ||
line = endQuote.endLine; | ||
@@ -60,3 +60,3 @@ col = endQuote.endCol; | ||
type: 'html-attr', | ||
uuid: ml_ast_1.uuid(), | ||
uuid: parser_utils_1.uuid(), | ||
raw: attrToken.raw, | ||
@@ -63,0 +63,0 @@ startOffset: attrToken.startOffset, |
@@ -8,61 +8,7 @@ "use strict"; | ||
const ml_ast_1 = require("@markuplint/ml-ast"); | ||
const parser_utils_1 = require("@markuplint/parser-utils"); | ||
const remove_deprecated_node_1 = require("./remove-deprecated-node"); | ||
const tag_splitter_1 = __importDefault(require("./tag-splitter")); | ||
function flattenNodes(nodeTree, rawHtml) { | ||
const nodeOrders = []; | ||
let prevLine = 1; | ||
let prevCol = 1; | ||
let currentStartOffset = 0; | ||
let currentEndOffset = 0; | ||
/** | ||
* pushing list | ||
*/ | ||
ml_ast_1.walk(nodeTree, node => { | ||
currentStartOffset = node.startOffset; | ||
const diff = currentStartOffset - currentEndOffset; | ||
if (diff > 0) { | ||
const html = rawHtml.slice(currentEndOffset, currentStartOffset); | ||
/** | ||
* first white spaces | ||
*/ | ||
if (/^\s+$/.test(html)) { | ||
const spaces = html; | ||
const textNode = { | ||
uuid: ml_ast_1.uuid(), | ||
raw: spaces, | ||
startOffset: currentEndOffset, | ||
endOffset: currentEndOffset + spaces.length, | ||
startLine: prevLine, | ||
endLine: ml_ast_1.getEndLine(spaces, prevLine), | ||
startCol: prevCol, | ||
endCol: ml_ast_1.getEndCol(spaces, prevCol), | ||
nodeName: '#text', | ||
type: ml_ast_1.MLASTNodeType.Text, | ||
parentNode: node.parentNode, | ||
prevNode: node.prevNode, | ||
nextNode: node, | ||
isFragment: false, | ||
isGhost: false, | ||
}; | ||
node.prevNode = textNode; | ||
if (node.parentNode && node.parentNode.childNodes) { | ||
node.parentNode.childNodes.unshift(textNode); | ||
} | ||
nodeOrders.push(textNode); | ||
} | ||
else if (/^<\/[a-z0-9][a-z0-9:-]*>$/i.test(html)) { | ||
// close tag | ||
} | ||
else { | ||
// never | ||
} | ||
} | ||
currentEndOffset = currentStartOffset + node.raw.length; | ||
prevLine = node.endLine; | ||
prevCol = node.endCol; | ||
// for ghost nodes | ||
node.startOffset = node.startOffset || currentStartOffset; | ||
node.endOffset = node.endOffset || currentEndOffset; | ||
nodeOrders.push(node); | ||
}); | ||
const nodeOrders = arrayize(nodeTree, rawHtml); | ||
{ | ||
@@ -96,5 +42,5 @@ /** | ||
prevWreckagesText.startLine = startLine; | ||
prevWreckagesText.endLine = ml_ast_1.getEndLine(raw, startLine); | ||
prevWreckagesText.endLine = parser_utils_1.getEndLine(raw, startLine); | ||
prevWreckagesText.startCol = startCol; | ||
prevWreckagesText.endCol = ml_ast_1.getEndCol(raw, startCol); | ||
prevWreckagesText.endCol = parser_utils_1.getEndCol(raw, startCol); | ||
} | ||
@@ -145,3 +91,3 @@ } | ||
const lastTextNode = { | ||
uuid: ml_ast_1.uuid(), | ||
uuid: parser_utils_1.uuid(), | ||
raw: lastTextContent, | ||
@@ -151,5 +97,5 @@ startOffset: lastOffset, | ||
startLine: line, | ||
endLine: ml_ast_1.getEndLine(lastTextContent, line), | ||
endLine: parser_utils_1.getEndLine(lastTextContent, line), | ||
startCol: col, | ||
endCol: ml_ast_1.getEndCol(lastTextContent, col), | ||
endCol: parser_utils_1.getEndCol(lastTextContent, col), | ||
nodeName: '#text', | ||
@@ -242,1 +188,60 @@ type: ml_ast_1.MLASTNodeType.Text, | ||
exports.flattenNodes = flattenNodes; | ||
function arrayize(nodeTree, rawHtml) { | ||
const nodeOrders = []; | ||
let prevLine = 1; | ||
let prevCol = 1; | ||
let currentStartOffset = 0; | ||
let currentEndOffset = 0; | ||
/** | ||
* pushing list | ||
*/ | ||
parser_utils_1.walk(nodeTree, node => { | ||
currentStartOffset = node.startOffset; | ||
const diff = currentStartOffset - currentEndOffset; | ||
if (diff > 0) { | ||
const html = rawHtml.slice(currentEndOffset, currentStartOffset); | ||
/** | ||
* first white spaces | ||
*/ | ||
if (/^\s+$/.test(html)) { | ||
const spaces = html; | ||
const textNode = { | ||
uuid: parser_utils_1.uuid(), | ||
raw: spaces, | ||
startOffset: currentEndOffset, | ||
endOffset: currentEndOffset + spaces.length, | ||
startLine: prevLine, | ||
endLine: parser_utils_1.getEndLine(spaces, prevLine), | ||
startCol: prevCol, | ||
endCol: parser_utils_1.getEndCol(spaces, prevCol), | ||
nodeName: '#text', | ||
type: ml_ast_1.MLASTNodeType.Text, | ||
parentNode: node.parentNode, | ||
prevNode: node.prevNode, | ||
nextNode: node, | ||
isFragment: false, | ||
isGhost: false, | ||
}; | ||
node.prevNode = textNode; | ||
if (node.parentNode && node.parentNode.childNodes) { | ||
node.parentNode.childNodes.unshift(textNode); | ||
} | ||
nodeOrders.push(textNode); | ||
} | ||
else if (/^<\/[a-z0-9][a-z0-9:-]*>$/i.test(html)) { | ||
// close tag | ||
} | ||
else { | ||
// never | ||
} | ||
} | ||
currentEndOffset = currentStartOffset + node.raw.length; | ||
prevLine = node.endLine; | ||
prevCol = node.endCol; | ||
// for ghost nodes | ||
node.startOffset = node.startOffset || currentStartOffset; | ||
node.endOffset = node.endOffset || currentEndOffset; | ||
nodeOrders.push(node); | ||
}); | ||
return nodeOrders; | ||
} |
@@ -6,5 +6,5 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
const ml_ast_1 = require("@markuplint/ml-ast"); | ||
const const_1 = require("./const"); | ||
const attr_tokenizer_1 = __importDefault(require("./attr-tokenizer")); | ||
const parser_utils_1 = require("@markuplint/parser-utils"); | ||
// eslint-disable-next-line no-control-regex | ||
@@ -47,7 +47,7 @@ const reAttrsInStartTag = /\s*[^\x00-\x1f\x7f-\x9f "'>/=]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^\s]*))?/; | ||
const endTokens = reEndTokens.exec(raw); | ||
const selfClosingSolidus = ml_ast_1.tokenizer(endTokens && endTokens[1], line, col, offset); | ||
const selfClosingSolidus = parser_utils_1.tokenizer(endTokens && endTokens[1], line, col, offset); | ||
line = selfClosingSolidus.endLine; | ||
col = selfClosingSolidus.endCol; | ||
offset = selfClosingSolidus.endOffset; | ||
const endSpace = ml_ast_1.tokenizer(endTokens && endTokens[2], line, col, offset); | ||
const endSpace = parser_utils_1.tokenizer(endTokens && endTokens[2], line, col, offset); | ||
return { | ||
@@ -54,0 +54,0 @@ tagName, |
@@ -1,2 +0,2 @@ | ||
import { Parse } from '@markuplint/ml-ast'; | ||
import type { Parse } from '@markuplint/ml-ast'; | ||
export declare const parse: Parse; |
229
lib/parse.js
@@ -7,9 +7,6 @@ "use strict"; | ||
exports.parse = void 0; | ||
const ml_ast_1 = require("@markuplint/ml-ast"); | ||
const create_tree_1 = require("./create-tree"); | ||
const flatten_nodes_1 = require("./flatten-nodes"); | ||
const parser_utils_1 = require("@markuplint/parser-utils"); | ||
const is_document_fragment_1 = __importDefault(require("./is-document-fragment")); | ||
const parse5_1 = __importDefault(require("parse5")); | ||
const parse_raw_tag_1 = __importDefault(require("./parse-raw-tag")); | ||
const P5_OPTIONS = { sourceCodeLocationInfo: true }; | ||
const parse = (rawCode, offsetOffset = 0, offsetLine = 0, offsetColumn = 0, isIgnoringFrontMatter) => { | ||
@@ -20,6 +17,4 @@ if (isIgnoringFrontMatter) { | ||
const isFragment = is_document_fragment_1.default(rawCode); | ||
const doc = isFragment | ||
? parse5_1.default.parseFragment(rawCode, P5_OPTIONS) | ||
: parse5_1.default.parse(rawCode, P5_OPTIONS); | ||
const nodeList = flatten_nodes_1.flattenNodes(traverse(doc, null, rawCode, offsetOffset, offsetLine, offsetColumn), rawCode); | ||
const nodeTree = create_tree_1.createTree(rawCode, isFragment, offsetOffset, offsetLine, offsetColumn); | ||
const nodeList = flatten_nodes_1.flattenNodes(nodeTree, rawCode); | ||
return { | ||
@@ -31,219 +26,1 @@ nodeList, | ||
exports.parse = parse; | ||
function traverse(rootNode, parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) { | ||
const nodeList = []; | ||
const childNodes = getChildNodes(rootNode); | ||
let prevNode = null; | ||
for (const p5node of childNodes) { | ||
const node = nodeize(p5node, prevNode, parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn); | ||
if (!node) { | ||
continue; | ||
} | ||
if (prevNode) { | ||
if (node.type !== ml_ast_1.MLASTNodeType.EndTag) { | ||
prevNode.nextNode = node; | ||
} | ||
node.prevNode = prevNode; | ||
} | ||
prevNode = node; | ||
nodeList.push(node); | ||
} | ||
return nodeList; | ||
} | ||
function nodeize(originNode, prevNode, parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) { | ||
const nextNode = null; | ||
if (!originNode.sourceCodeLocation) { | ||
const prevToken = prevNode || parentNode; | ||
const startOffset = prevToken ? prevToken.endOffset : 0; | ||
const endOffset = prevToken ? prevToken.endOffset : 0; | ||
const startLine = prevToken ? prevToken.endLine : 0; | ||
const endLine = prevToken ? prevToken.endLine : 0; | ||
const startCol = prevToken ? prevToken.endCol : 0; | ||
const endCol = prevToken ? prevToken.endCol : 0; | ||
const node = { | ||
uuid: ml_ast_1.uuid(), | ||
raw: '', | ||
startOffset: startOffset + offsetOffset, | ||
endOffset: endOffset + offsetOffset, | ||
startLine: startLine + offsetLine, | ||
endLine: endLine + offsetLine, | ||
startCol: startCol + (startLine === 1 ? offsetColumn : 0), | ||
endCol: endCol + (endLine === 1 ? offsetColumn : 0), | ||
nodeName: originNode.nodeName, | ||
type: ml_ast_1.MLASTNodeType.OmittedTag, | ||
namespace: originNode.namespaceURI, | ||
parentNode, | ||
prevNode, | ||
nextNode, | ||
isFragment: false, | ||
isGhost: true, | ||
}; | ||
node.childNodes = traverse(originNode, node, rawHtml, offsetOffset, offsetLine, offsetColumn); | ||
return node; | ||
} | ||
const { startOffset, endOffset, startLine, endLine, startCol, endCol } = originNode.sourceCodeLocation; | ||
const raw = rawHtml.slice(startOffset, endOffset || startOffset); | ||
switch (originNode.nodeName) { | ||
case '#documentType': { | ||
return { | ||
uuid: ml_ast_1.uuid(), | ||
raw, | ||
// @ts-ignore | ||
name: originNode.name || '', | ||
// @ts-ignore | ||
publicId: originNode.publicId || '', | ||
// @ts-ignore | ||
systemId: originNode.systemId || '', | ||
startOffset: startOffset + offsetOffset, | ||
endOffset: endOffset + offsetOffset, | ||
startLine: startLine + offsetLine, | ||
endLine: endLine + offsetLine, | ||
startCol: startCol + (startLine === 1 ? offsetColumn : 0), | ||
endCol: endCol + (endLine === 1 ? offsetColumn : 0), | ||
nodeName: '#doctype', | ||
type: ml_ast_1.MLASTNodeType.Doctype, | ||
parentNode, | ||
prevNode, | ||
_addPrevNode: 102, | ||
nextNode, | ||
isFragment: false, | ||
isGhost: false, | ||
}; | ||
} | ||
case '#text': { | ||
const node = { | ||
uuid: ml_ast_1.uuid(), | ||
raw, | ||
startOffset: startOffset + offsetOffset, | ||
endOffset: endOffset + offsetOffset, | ||
startLine: startLine + offsetLine, | ||
endLine: endLine + offsetLine, | ||
startCol: startCol + (startLine === 1 ? offsetColumn : 0), | ||
endCol: endCol + (endLine === 1 ? offsetColumn : 0), | ||
nodeName: '#text', | ||
type: ml_ast_1.MLASTNodeType.Text, | ||
parentNode, | ||
prevNode, | ||
nextNode, | ||
isFragment: false, | ||
isGhost: false, | ||
}; | ||
return node; | ||
} | ||
case '#comment': { | ||
return { | ||
uuid: ml_ast_1.uuid(), | ||
raw, | ||
startOffset: startOffset + offsetOffset, | ||
endOffset: endOffset + offsetOffset, | ||
startLine: startLine + offsetLine, | ||
endLine: endLine + offsetLine, | ||
startCol: startCol + (startLine === 1 ? offsetColumn : 0), | ||
endCol: endCol + (endLine === 1 ? offsetColumn : 0), | ||
nodeName: '#comment', | ||
type: ml_ast_1.MLASTNodeType.Comment, | ||
parentNode, | ||
prevNode, | ||
nextNode, | ||
isFragment: false, | ||
isGhost: false, | ||
}; | ||
} | ||
default: { | ||
const tagLoc = originNode.sourceCodeLocation.startTag; | ||
const startTagRaw = originNode.sourceCodeLocation.startTag | ||
? rawHtml.slice(tagLoc.startOffset, tagLoc.endOffset) | ||
: rawHtml.slice(startOffset, endOffset || startOffset); | ||
const tagTokens = parse_raw_tag_1.default(startTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn); | ||
const tagName = tagTokens.tagName; | ||
let endTag = null; | ||
const endTagLoc = originNode.sourceCodeLocation.endTag; | ||
if (endTagLoc) { | ||
const { startOffset, endOffset, startLine, endLine, startCol, endCol } = endTagLoc; | ||
const endTagRaw = rawHtml.slice(startOffset, endOffset); | ||
const endTagTokens = parse_raw_tag_1.default(endTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn); | ||
const endTagName = endTagTokens.tagName; | ||
endTag = { | ||
uuid: ml_ast_1.uuid(), | ||
raw: endTagRaw, | ||
startOffset: startOffset + offsetOffset, | ||
endOffset: endOffset + offsetOffset, | ||
startLine: startLine + offsetLine, | ||
endLine: endLine + offsetLine, | ||
startCol: startCol + (startLine === 1 ? offsetColumn : 0), | ||
endCol: endCol + (endLine === 1 ? offsetColumn : 0), | ||
nodeName: endTagName, | ||
type: ml_ast_1.MLASTNodeType.EndTag, | ||
namespace: originNode.namespaceURI, | ||
attributes: endTagTokens.attrs, | ||
parentNode, | ||
prevNode, | ||
nextNode, | ||
pearNode: null, | ||
isFragment: false, | ||
isGhost: false, | ||
tagOpenChar: '</', | ||
tagCloseChar: '>', | ||
}; | ||
} | ||
const _endOffset = startOffset + startTagRaw.length; | ||
const _endLine = ml_ast_1.getEndLine(startTagRaw, startLine); | ||
const _endCol = ml_ast_1.getEndCol(startTagRaw, startCol); | ||
const startTag = { | ||
uuid: ml_ast_1.uuid(), | ||
raw: startTagRaw, | ||
startOffset: startOffset + offsetOffset, | ||
endOffset: _endOffset + offsetOffset, | ||
startLine: startLine + offsetLine, | ||
endLine: _endLine + offsetLine, | ||
startCol: startCol + (startLine === 1 ? offsetColumn : 0), | ||
endCol: _endCol + (startLine === _endLine ? offsetColumn : 0), | ||
nodeName: tagName, | ||
type: ml_ast_1.MLASTNodeType.StartTag, | ||
namespace: originNode.namespaceURI, | ||
attributes: tagTokens.attrs, | ||
parentNode, | ||
prevNode, | ||
nextNode, | ||
pearNode: endTag, | ||
selfClosingSolidus: tagTokens.selfClosingSolidus, | ||
endSpace: tagTokens.endSpace, | ||
isFragment: false, | ||
isGhost: false, | ||
tagOpenChar: '<', | ||
tagCloseChar: '>', | ||
}; | ||
if (endTag) { | ||
endTag.pearNode = startTag; | ||
} | ||
startTag.childNodes = traverse(originNode, startTag, rawHtml, offsetOffset, offsetLine, offsetColumn); | ||
return startTag; | ||
} | ||
} | ||
} | ||
/** | ||
* getChildNodes | ||
* | ||
* - If node has "content" property then parse as document fragment. | ||
* - If node is <noscript> then that childNodes is a TextNode. But parse as document fragment it for disabled script. | ||
*/ | ||
function getChildNodes(rootNode) { | ||
if (rootNode.nodeName === 'noscript') { | ||
const textNode = rootNode.childNodes[0]; | ||
if (!textNode || textNode.nodeName !== '#text') { | ||
return []; | ||
} | ||
// @ts-ignore | ||
const html = textNode.value; | ||
// @ts-ignore | ||
const { startOffset, startLine, startCol } = rootNode.sourceCodeLocation; | ||
const breakCount = startLine - 1; | ||
const indentWidth = startCol - 1; | ||
const spaces = ' '.repeat(startOffset - Math.max(breakCount, 0) - Math.max(indentWidth, 0)) + | ||
'\n'.repeat(breakCount) + | ||
' '.repeat(indentWidth); | ||
const fragment = parse5_1.default.parseFragment(`${spaces}<x-script>${html}</x-script>`, P5_OPTIONS); | ||
const childNodes = fragment.childNodes[spaces ? 1 : 0].childNodes; | ||
return childNodes; | ||
} | ||
return rootNode.content ? rootNode.content.childNodes : rootNode.childNodes; | ||
} |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
const ml_ast_1 = require("@markuplint/ml-ast"); | ||
const parser_utils_1 = require("@markuplint/parser-utils"); | ||
const const_1 = require("./const"); | ||
@@ -88,6 +88,6 @@ function tagSplitter(raw, line, col) { | ||
} | ||
line = ml_ast_1.getEndLine(node, line); | ||
col = ml_ast_1.getEndCol(node, col); | ||
line = parser_utils_1.getEndLine(node, line); | ||
col = parser_utils_1.getEndCol(node, col); | ||
} | ||
return result; | ||
} |
{ | ||
"name": "@markuplint/html-parser", | ||
"version": "1.2.0", | ||
"version": "1.3.0", | ||
"description": "HTML parser for markuplint", | ||
@@ -20,10 +20,10 @@ "repository": "git@github.com:markuplint/markuplint.git", | ||
"devDependencies": { | ||
"@types/parse5": "^5.0.3" | ||
"@types/parse5": "^6.0.0" | ||
}, | ||
"dependencies": { | ||
"@markuplint/ml-ast": "^1.2.0", | ||
"@markuplint/parser-utils": "^1.1.0", | ||
"@markuplint/ml-ast": "^1.3.0", | ||
"@markuplint/parser-utils": "^1.2.0", | ||
"parse5": "^6.0.1" | ||
}, | ||
"gitHead": "1806f99b6429b73ce498f6ecf641efc2400f47dd" | ||
"gitHead": "15e11fd74042d8b378387644d36cd882962a9ec8" | ||
} |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
27
974
194024
Updated@markuplint/ml-ast@^1.3.0