"use strict";
		Object.defineProperty(exports, "__esModule", { value: true });
		const ml_ast_1 = require("@markuplint/ml-ast");
		const parser_utils_1 = require("@markuplint/parser-utils");
		// eslint-disable-next-line no-control-regex
		@@ -21,35 +21,35 @@ const reAttrsInStartTag = /(\s)([^\x00-\x1f\x7f-\x9f "'>/=]+)(?:(\s)(=)(\s)(?:(?:"([^"])")\|(?:'([^'])')\|([^\s])))?/;
		let offset = startOffset;
		const attrToken = ml_ast_1.tokenizer(raw, line, col, offset);
		const attrToken = parser_utils_1.tokenizer(raw, line, col, offset);
		line = attrToken.startLine;
		col = attrToken.startCol;
		offset = attrToken.startOffset;
		const spacesBeforeName = ml_ast_1.tokenizer(spacesBeforeAttrString, line, col, offset);
		const spacesBeforeName = parser_utils_1.tokenizer(spacesBeforeAttrString, line, col, offset);
		line = spacesBeforeName.endLine;
		col = spacesBeforeName.endCol;
		offset = spacesBeforeName.endOffset;
		const name = ml_ast_1.tokenizer(nameChars, line, col, offset);
		const name = parser_utils_1.tokenizer(nameChars, line, col, offset);
		line = name.endLine;
		col = name.endCol;
		offset = name.endOffset;
		const spacesBeforeEqual = ml_ast_1.tokenizer(spacesBeforeEqualChars, line, col, offset);
		const spacesBeforeEqual = parser_utils_1.tokenizer(spacesBeforeEqualChars, line, col, offset);
		line = spacesBeforeEqual.endLine;
		col = spacesBeforeEqual.endCol;
		offset = spacesBeforeEqual.endOffset;
		const equal = ml_ast_1.tokenizer(equalChars, line, col, offset);
		const equal = parser_utils_1.tokenizer(equalChars, line, col, offset);
		line = equal.endLine;
		col = equal.endCol;
		offset = equal.endOffset;
		const spacesAfterEqual = ml_ast_1.tokenizer(spacesAfterEqualChars, line, col, offset);
		const spacesAfterEqual = parser_utils_1.tokenizer(spacesAfterEqualChars, line, col, offset);
		line = spacesAfterEqual.endLine;
		col = spacesAfterEqual.endCol;
		offset = spacesAfterEqual.endOffset;
		const startQuote = ml_ast_1.tokenizer(quoteChars, line, col, offset);
		const startQuote = parser_utils_1.tokenizer(quoteChars, line, col, offset);
		line = startQuote.endLine;
		col = startQuote.endCol;
		offset = startQuote.endOffset;
		const value = ml_ast_1.tokenizer(valueChars, line, col, offset);
		const value = parser_utils_1.tokenizer(valueChars, line, col, offset);
		line = value.endLine;
		col = value.endCol;
		offset = value.endOffset;
		const endQuote = ml_ast_1.tokenizer(quoteChars, line, col, offset);
		const endQuote = parser_utils_1.tokenizer(quoteChars, line, col, offset);
		line = endQuote.endLine;
		@@ -60,3 +60,3 @@ col = endQuote.endCol;
		type: 'html-attr',
		uuid: ml_ast_1.uuid(),
		uuid: parser_utils_1.uuid(),
		raw: attrToken.raw,
		@@ -63,0 +63,0 @@ startOffset: attrToken.startOffset,

127

lib/flatten-nodes.js

		@@ -8,61 +8,7 @@ "use strict";
		const ml_ast_1 = require("@markuplint/ml-ast");
		const parser_utils_1 = require("@markuplint/parser-utils");
		const remove_deprecated_node_1 = require("./remove-deprecated-node");
		const tag_splitter_1 = __importDefault(require("./tag-splitter"));
		function flattenNodes(nodeTree, rawHtml) {
		const nodeOrders = [];
		let prevLine = 1;
		let prevCol = 1;
		let currentStartOffset = 0;
		let currentEndOffset = 0;
		/**
		* pushing list
		*/
		ml_ast_1.walk(nodeTree, node => {
		currentStartOffset = node.startOffset;
		const diff = currentStartOffset - currentEndOffset;
		if (diff > 0) {
		const html = rawHtml.slice(currentEndOffset, currentStartOffset);
		/**
		* first white spaces
		*/
		if (/^\s+$/.test(html)) {
		const spaces = html;
		const textNode = {
		uuid: ml_ast_1.uuid(),
		raw: spaces,
		startOffset: currentEndOffset,
		endOffset: currentEndOffset + spaces.length,
		startLine: prevLine,
		endLine: ml_ast_1.getEndLine(spaces, prevLine),
		startCol: prevCol,
		endCol: ml_ast_1.getEndCol(spaces, prevCol),
		nodeName: '#text',
		type: ml_ast_1.MLASTNodeType.Text,
		parentNode: node.parentNode,
		prevNode: node.prevNode,
		nextNode: node,
		isFragment: false,
		isGhost: false,
		};
		node.prevNode = textNode;
		if (node.parentNode && node.parentNode.childNodes) {
		node.parentNode.childNodes.unshift(textNode);
		}
		nodeOrders.push(textNode);
		}
		else if (/^<\/[a-z0-9][a-z0-9:-]*>$/i.test(html)) {
		// close tag
		}
		else {
		// never
		}
		}
		currentEndOffset = currentStartOffset + node.raw.length;
		prevLine = node.endLine;
		prevCol = node.endCol;
		// for ghost nodes
		node.startOffset = node.startOffset \|\| currentStartOffset;
		node.endOffset = node.endOffset \|\| currentEndOffset;
		nodeOrders.push(node);
		});
		const nodeOrders = arrayize(nodeTree, rawHtml);
		{
		@@ -96,5 +42,5 @@ /**
		prevWreckagesText.startLine = startLine;
		prevWreckagesText.endLine = ml_ast_1.getEndLine(raw, startLine);
		prevWreckagesText.endLine = parser_utils_1.getEndLine(raw, startLine);
		prevWreckagesText.startCol = startCol;
		prevWreckagesText.endCol = ml_ast_1.getEndCol(raw, startCol);
		prevWreckagesText.endCol = parser_utils_1.getEndCol(raw, startCol);
		}
		@@ -145,3 +91,3 @@ }
		const lastTextNode = {
		uuid: ml_ast_1.uuid(),
		uuid: parser_utils_1.uuid(),
		raw: lastTextContent,
		@@ -151,5 +97,5 @@ startOffset: lastOffset,
		startLine: line,
		endLine: ml_ast_1.getEndLine(lastTextContent, line),
		endLine: parser_utils_1.getEndLine(lastTextContent, line),
		startCol: col,
		endCol: ml_ast_1.getEndCol(lastTextContent, col),
		endCol: parser_utils_1.getEndCol(lastTextContent, col),
		nodeName: '#text',
		@@ -242,1 +188,60 @@ type: ml_ast_1.MLASTNodeType.Text,
		exports.flattenNodes = flattenNodes;
		function arrayize(nodeTree, rawHtml) {
		const nodeOrders = [];
		let prevLine = 1;
		let prevCol = 1;
		let currentStartOffset = 0;
		let currentEndOffset = 0;
		/**
		* pushing list
		*/
		parser_utils_1.walk(nodeTree, node => {
		currentStartOffset = node.startOffset;
		const diff = currentStartOffset - currentEndOffset;
		if (diff > 0) {
		const html = rawHtml.slice(currentEndOffset, currentStartOffset);
		/**
		* first white spaces
		*/
		if (/^\s+$/.test(html)) {
		const spaces = html;
		const textNode = {
		uuid: parser_utils_1.uuid(),
		raw: spaces,
		startOffset: currentEndOffset,
		endOffset: currentEndOffset + spaces.length,
		startLine: prevLine,
		endLine: parser_utils_1.getEndLine(spaces, prevLine),
		startCol: prevCol,
		endCol: parser_utils_1.getEndCol(spaces, prevCol),
		nodeName: '#text',
		type: ml_ast_1.MLASTNodeType.Text,
		parentNode: node.parentNode,
		prevNode: node.prevNode,
		nextNode: node,
		isFragment: false,
		isGhost: false,
		};
		node.prevNode = textNode;
		if (node.parentNode && node.parentNode.childNodes) {
		node.parentNode.childNodes.unshift(textNode);
		}
		nodeOrders.push(textNode);
		}
		else if (/^<\/[a-z0-9][a-z0-9:-]*>$/i.test(html)) {
		// close tag
		}
		else {
		// never
		}
		}
		currentEndOffset = currentStartOffset + node.raw.length;
		prevLine = node.endLine;
		prevCol = node.endCol;
		// for ghost nodes
		node.startOffset = node.startOffset \|\| currentStartOffset;
		node.endOffset = node.endOffset \|\| currentEndOffset;
		nodeOrders.push(node);
		});
		return nodeOrders;
		}

lib/parse-raw-tag.js

		@@ -6,5 +6,5 @@ "use strict";
		Object.defineProperty(exports, "__esModule", { value: true });
		const ml_ast_1 = require("@markuplint/ml-ast");
		const const_1 = require("./const");
		const attr_tokenizer_1 = __importDefault(require("./attr-tokenizer"));
		const parser_utils_1 = require("@markuplint/parser-utils");
		// eslint-disable-next-line no-control-regex
		@@ -47,7 +47,7 @@ const reAttrsInStartTag = /\s[^\x00-\x1f\x7f-\x9f "'>/=]+(?:\s=\s(?:(?:"[^"]")\|(?:'[^']')\|[^\s]))?/;
		const endTokens = reEndTokens.exec(raw);
		const selfClosingSolidus = ml_ast_1.tokenizer(endTokens && endTokens[1], line, col, offset);
		const selfClosingSolidus = parser_utils_1.tokenizer(endTokens && endTokens[1], line, col, offset);
		line = selfClosingSolidus.endLine;
		col = selfClosingSolidus.endCol;
		offset = selfClosingSolidus.endOffset;
		const endSpace = ml_ast_1.tokenizer(endTokens && endTokens[2], line, col, offset);
		const endSpace = parser_utils_1.tokenizer(endTokens && endTokens[2], line, col, offset);
		return {
		@@ -54,0 +54,0 @@ tagName,

lib/parse.d.ts

		@@ -1,2 +0,2 @@
		import { Parse } from '@markuplint/ml-ast';
		import type { Parse } from '@markuplint/ml-ast';
		export declare const parse: Parse;

229

lib/parse.js

		@@ -7,9 +7,6 @@ "use strict";
		exports.parse = void 0;
		const ml_ast_1 = require("@markuplint/ml-ast");
		const create_tree_1 = require("./create-tree");
		const flatten_nodes_1 = require("./flatten-nodes");
		const parser_utils_1 = require("@markuplint/parser-utils");
		const is_document_fragment_1 = __importDefault(require("./is-document-fragment"));
		const parse5_1 = __importDefault(require("parse5"));
		const parse_raw_tag_1 = __importDefault(require("./parse-raw-tag"));
		const P5_OPTIONS = { sourceCodeLocationInfo: true };
		const parse = (rawCode, offsetOffset = 0, offsetLine = 0, offsetColumn = 0, isIgnoringFrontMatter) => {
		@@ -20,6 +17,4 @@ if (isIgnoringFrontMatter) {
		const isFragment = is_document_fragment_1.default(rawCode);
		const doc = isFragment
		? parse5_1.default.parseFragment(rawCode, P5_OPTIONS)
		: parse5_1.default.parse(rawCode, P5_OPTIONS);
		const nodeList = flatten_nodes_1.flattenNodes(traverse(doc, null, rawCode, offsetOffset, offsetLine, offsetColumn), rawCode);
		const nodeTree = create_tree_1.createTree(rawCode, isFragment, offsetOffset, offsetLine, offsetColumn);
		const nodeList = flatten_nodes_1.flattenNodes(nodeTree, rawCode);
		return {
		@@ -31,219 +26,1 @@ nodeList,
		exports.parse = parse;
		function traverse(rootNode, parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
		const nodeList = [];
		const childNodes = getChildNodes(rootNode);
		let prevNode = null;
		for (const p5node of childNodes) {
		const node = nodeize(p5node, prevNode, parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn);
		if (!node) {
		continue;
		}
		if (prevNode) {
		if (node.type !== ml_ast_1.MLASTNodeType.EndTag) {
		prevNode.nextNode = node;
		}
		node.prevNode = prevNode;
		}
		prevNode = node;
		nodeList.push(node);
		}
		return nodeList;
		}
		function nodeize(originNode, prevNode, parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
		const nextNode = null;
		if (!originNode.sourceCodeLocation) {
		const prevToken = prevNode \|\| parentNode;
		const startOffset = prevToken ? prevToken.endOffset : 0;
		const endOffset = prevToken ? prevToken.endOffset : 0;
		const startLine = prevToken ? prevToken.endLine : 0;
		const endLine = prevToken ? prevToken.endLine : 0;
		const startCol = prevToken ? prevToken.endCol : 0;
		const endCol = prevToken ? prevToken.endCol : 0;
		const node = {
		uuid: ml_ast_1.uuid(),
		raw: '',
		startOffset: startOffset + offsetOffset,
		endOffset: endOffset + offsetOffset,
		startLine: startLine + offsetLine,
		endLine: endLine + offsetLine,
		startCol: startCol + (startLine === 1 ? offsetColumn : 0),
		endCol: endCol + (endLine === 1 ? offsetColumn : 0),
		nodeName: originNode.nodeName,
		type: ml_ast_1.MLASTNodeType.OmittedTag,
		namespace: originNode.namespaceURI,
		parentNode,
		prevNode,
		nextNode,
		isFragment: false,
		isGhost: true,
		};
		node.childNodes = traverse(originNode, node, rawHtml, offsetOffset, offsetLine, offsetColumn);
		return node;
		}
		const { startOffset, endOffset, startLine, endLine, startCol, endCol } = originNode.sourceCodeLocation;
		const raw = rawHtml.slice(startOffset, endOffset \|\| startOffset);
		switch (originNode.nodeName) {
		case '#documentType': {
		return {
		uuid: ml_ast_1.uuid(),
		raw,
		// @ts-ignore
		name: originNode.name \|\| '',
		// @ts-ignore
		publicId: originNode.publicId \|\| '',
		// @ts-ignore
		systemId: originNode.systemId \|\| '',
		startOffset: startOffset + offsetOffset,
		endOffset: endOffset + offsetOffset,
		startLine: startLine + offsetLine,
		endLine: endLine + offsetLine,
		startCol: startCol + (startLine === 1 ? offsetColumn : 0),
		endCol: endCol + (endLine === 1 ? offsetColumn : 0),
		nodeName: '#doctype',
		type: ml_ast_1.MLASTNodeType.Doctype,
		parentNode,
		prevNode,
		_addPrevNode: 102,
		nextNode,
		isFragment: false,
		isGhost: false,
		};
		}
		case '#text': {
		const node = {
		uuid: ml_ast_1.uuid(),
		raw,
		startOffset: startOffset + offsetOffset,
		endOffset: endOffset + offsetOffset,
		startLine: startLine + offsetLine,
		endLine: endLine + offsetLine,
		startCol: startCol + (startLine === 1 ? offsetColumn : 0),
		endCol: endCol + (endLine === 1 ? offsetColumn : 0),
		nodeName: '#text',
		type: ml_ast_1.MLASTNodeType.Text,
		parentNode,
		prevNode,
		nextNode,
		isFragment: false,
		isGhost: false,
		};
		return node;
		}
		case '#comment': {
		return {
		uuid: ml_ast_1.uuid(),
		raw,
		startOffset: startOffset + offsetOffset,
		endOffset: endOffset + offsetOffset,
		startLine: startLine + offsetLine,
		endLine: endLine + offsetLine,
		startCol: startCol + (startLine === 1 ? offsetColumn : 0),
		endCol: endCol + (endLine === 1 ? offsetColumn : 0),
		nodeName: '#comment',
		type: ml_ast_1.MLASTNodeType.Comment,
		parentNode,
		prevNode,
		nextNode,
		isFragment: false,
		isGhost: false,
		};
		}
		default: {
		const tagLoc = originNode.sourceCodeLocation.startTag;
		const startTagRaw = originNode.sourceCodeLocation.startTag
		? rawHtml.slice(tagLoc.startOffset, tagLoc.endOffset)
		: rawHtml.slice(startOffset, endOffset \|\| startOffset);
		const tagTokens = parse_raw_tag_1.default(startTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
		const tagName = tagTokens.tagName;
		let endTag = null;
		const endTagLoc = originNode.sourceCodeLocation.endTag;
		if (endTagLoc) {
		const { startOffset, endOffset, startLine, endLine, startCol, endCol } = endTagLoc;
		const endTagRaw = rawHtml.slice(startOffset, endOffset);
		const endTagTokens = parse_raw_tag_1.default(endTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
		const endTagName = endTagTokens.tagName;
		endTag = {
		uuid: ml_ast_1.uuid(),
		raw: endTagRaw,
		startOffset: startOffset + offsetOffset,
		endOffset: endOffset + offsetOffset,
		startLine: startLine + offsetLine,
		endLine: endLine + offsetLine,
		startCol: startCol + (startLine === 1 ? offsetColumn : 0),
		endCol: endCol + (endLine === 1 ? offsetColumn : 0),
		nodeName: endTagName,
		type: ml_ast_1.MLASTNodeType.EndTag,
		namespace: originNode.namespaceURI,
		attributes: endTagTokens.attrs,
		parentNode,
		prevNode,
		nextNode,
		pearNode: null,
		isFragment: false,
		isGhost: false,
		tagOpenChar: '</',
		tagCloseChar: '>',
		};
		}
		const _endOffset = startOffset + startTagRaw.length;
		const _endLine = ml_ast_1.getEndLine(startTagRaw, startLine);
		const _endCol = ml_ast_1.getEndCol(startTagRaw, startCol);
		const startTag = {
		uuid: ml_ast_1.uuid(),
		raw: startTagRaw,
		startOffset: startOffset + offsetOffset,
		endOffset: _endOffset + offsetOffset,
		startLine: startLine + offsetLine,
		endLine: _endLine + offsetLine,
		startCol: startCol + (startLine === 1 ? offsetColumn : 0),
		endCol: _endCol + (startLine === _endLine ? offsetColumn : 0),
		nodeName: tagName,
		type: ml_ast_1.MLASTNodeType.StartTag,
		namespace: originNode.namespaceURI,
		attributes: tagTokens.attrs,
		parentNode,
		prevNode,
		nextNode,
		pearNode: endTag,
		selfClosingSolidus: tagTokens.selfClosingSolidus,
		endSpace: tagTokens.endSpace,
		isFragment: false,
		isGhost: false,
		tagOpenChar: '<',
		tagCloseChar: '>',
		};
		if (endTag) {
		endTag.pearNode = startTag;
		}
		startTag.childNodes = traverse(originNode, startTag, rawHtml, offsetOffset, offsetLine, offsetColumn);
		return startTag;
		}
		}
		}
		/**
		* getChildNodes
		*
		* - If node has "content" property then parse as document fragment.
		* - If node is <noscript> then that childNodes is a TextNode. But parse as document fragment it for disabled script.
		*/
		function getChildNodes(rootNode) {
		if (rootNode.nodeName === 'noscript') {
		const textNode = rootNode.childNodes[0];
		if (!textNode \|\| textNode.nodeName !== '#text') {
		return [];
		}
		// @ts-ignore
		const html = textNode.value;
		// @ts-ignore
		const { startOffset, startLine, startCol } = rootNode.sourceCodeLocation;
		const breakCount = startLine - 1;
		const indentWidth = startCol - 1;
		const spaces = ' '.repeat(startOffset - Math.max(breakCount, 0) - Math.max(indentWidth, 0)) +
		'\n'.repeat(breakCount) +
		' '.repeat(indentWidth);
		const fragment = parse5_1.default.parseFragment(`${spaces}<x-script>${html}</x-script>`, P5_OPTIONS);
		const childNodes = fragment.childNodes[spaces ? 1 : 0].childNodes;
		return childNodes;
		}
		return rootNode.content ? rootNode.content.childNodes : rootNode.childNodes;
		}

lib/tag-splitter.js

		"use strict";
		Object.defineProperty(exports, "__esModule", { value: true });
		const ml_ast_1 = require("@markuplint/ml-ast");
		const parser_utils_1 = require("@markuplint/parser-utils");
		const const_1 = require("./const");
		@@ -88,6 +88,6 @@ function tagSplitter(raw, line, col) {
		}
		line = ml_ast_1.getEndLine(node, line);
		col = ml_ast_1.getEndCol(node, col);
		line = parser_utils_1.getEndLine(node, line);
		col = parser_utils_1.getEndCol(node, col);
		}
		return result;
		}

package.json

		{
		"name": "@markuplint/html-parser",
		"version": "1.2.0",
		"version": "1.3.0",
		"description": "HTML parser for markuplint",
		@@ -20,10 +20,10 @@ "repository": "git@github.com:markuplint/markuplint.git",
		"devDependencies": {
		"@types/parse5": "^5.0.3"
		"@types/parse5": "^6.0.0"
		},
		"dependencies": {
		"@markuplint/ml-ast": "^1.2.0",
		"@markuplint/parser-utils": "^1.1.0",
		"@markuplint/ml-ast": "^1.3.0",
		"@markuplint/parser-utils": "^1.2.0",
		"parse5": "^6.0.1"
		},
		"gitHead": "1806f99b6429b73ce498f6ecf641efc2400f47dd"
		"gitHead": "15e11fd74042d8b378387644d36cd882962a9ec8"
		}

tsconfig.tsbuildinfo

Sorry, the diff of this file is not supported yet

@markuplint/html-parser - npm Package Compare versions

Improved metrics

Worsened metrics

Dependency changes