Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@markuplint/html-parser

Package Overview
Dependencies
Maintainers
1
Versions
170
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@markuplint/html-parser - npm Package Compare versions

Comparing version 1.2.0 to 1.3.0

lib/create-tree.d.ts

22

lib/attr-tokenizer.js
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const ml_ast_1 = require("@markuplint/ml-ast");
const parser_utils_1 = require("@markuplint/parser-utils");
// eslint-disable-next-line no-control-regex

@@ -21,35 +21,35 @@ const reAttrsInStartTag = /(\s*)([^\x00-\x1f\x7f-\x9f "'>/=]+)(?:(\s*)(=)(\s*)(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s]*)))?/;

let offset = startOffset;
const attrToken = ml_ast_1.tokenizer(raw, line, col, offset);
const attrToken = parser_utils_1.tokenizer(raw, line, col, offset);
line = attrToken.startLine;
col = attrToken.startCol;
offset = attrToken.startOffset;
const spacesBeforeName = ml_ast_1.tokenizer(spacesBeforeAttrString, line, col, offset);
const spacesBeforeName = parser_utils_1.tokenizer(spacesBeforeAttrString, line, col, offset);
line = spacesBeforeName.endLine;
col = spacesBeforeName.endCol;
offset = spacesBeforeName.endOffset;
const name = ml_ast_1.tokenizer(nameChars, line, col, offset);
const name = parser_utils_1.tokenizer(nameChars, line, col, offset);
line = name.endLine;
col = name.endCol;
offset = name.endOffset;
const spacesBeforeEqual = ml_ast_1.tokenizer(spacesBeforeEqualChars, line, col, offset);
const spacesBeforeEqual = parser_utils_1.tokenizer(spacesBeforeEqualChars, line, col, offset);
line = spacesBeforeEqual.endLine;
col = spacesBeforeEqual.endCol;
offset = spacesBeforeEqual.endOffset;
const equal = ml_ast_1.tokenizer(equalChars, line, col, offset);
const equal = parser_utils_1.tokenizer(equalChars, line, col, offset);
line = equal.endLine;
col = equal.endCol;
offset = equal.endOffset;
const spacesAfterEqual = ml_ast_1.tokenizer(spacesAfterEqualChars, line, col, offset);
const spacesAfterEqual = parser_utils_1.tokenizer(spacesAfterEqualChars, line, col, offset);
line = spacesAfterEqual.endLine;
col = spacesAfterEqual.endCol;
offset = spacesAfterEqual.endOffset;
const startQuote = ml_ast_1.tokenizer(quoteChars, line, col, offset);
const startQuote = parser_utils_1.tokenizer(quoteChars, line, col, offset);
line = startQuote.endLine;
col = startQuote.endCol;
offset = startQuote.endOffset;
const value = ml_ast_1.tokenizer(valueChars, line, col, offset);
const value = parser_utils_1.tokenizer(valueChars, line, col, offset);
line = value.endLine;
col = value.endCol;
offset = value.endOffset;
const endQuote = ml_ast_1.tokenizer(quoteChars, line, col, offset);
const endQuote = parser_utils_1.tokenizer(quoteChars, line, col, offset);
line = endQuote.endLine;

@@ -60,3 +60,3 @@ col = endQuote.endCol;

type: 'html-attr',
uuid: ml_ast_1.uuid(),
uuid: parser_utils_1.uuid(),
raw: attrToken.raw,

@@ -63,0 +63,0 @@ startOffset: attrToken.startOffset,

@@ -8,61 +8,7 @@ "use strict";

const ml_ast_1 = require("@markuplint/ml-ast");
const parser_utils_1 = require("@markuplint/parser-utils");
const remove_deprecated_node_1 = require("./remove-deprecated-node");
const tag_splitter_1 = __importDefault(require("./tag-splitter"));
function flattenNodes(nodeTree, rawHtml) {
const nodeOrders = [];
let prevLine = 1;
let prevCol = 1;
let currentStartOffset = 0;
let currentEndOffset = 0;
/**
* pushing list
*/
ml_ast_1.walk(nodeTree, node => {
currentStartOffset = node.startOffset;
const diff = currentStartOffset - currentEndOffset;
if (diff > 0) {
const html = rawHtml.slice(currentEndOffset, currentStartOffset);
/**
* first white spaces
*/
if (/^\s+$/.test(html)) {
const spaces = html;
const textNode = {
uuid: ml_ast_1.uuid(),
raw: spaces,
startOffset: currentEndOffset,
endOffset: currentEndOffset + spaces.length,
startLine: prevLine,
endLine: ml_ast_1.getEndLine(spaces, prevLine),
startCol: prevCol,
endCol: ml_ast_1.getEndCol(spaces, prevCol),
nodeName: '#text',
type: ml_ast_1.MLASTNodeType.Text,
parentNode: node.parentNode,
prevNode: node.prevNode,
nextNode: node,
isFragment: false,
isGhost: false,
};
node.prevNode = textNode;
if (node.parentNode && node.parentNode.childNodes) {
node.parentNode.childNodes.unshift(textNode);
}
nodeOrders.push(textNode);
}
else if (/^<\/[a-z0-9][a-z0-9:-]*>$/i.test(html)) {
// close tag
}
else {
// never
}
}
currentEndOffset = currentStartOffset + node.raw.length;
prevLine = node.endLine;
prevCol = node.endCol;
// for ghost nodes
node.startOffset = node.startOffset || currentStartOffset;
node.endOffset = node.endOffset || currentEndOffset;
nodeOrders.push(node);
});
const nodeOrders = arrayize(nodeTree, rawHtml);
{

@@ -96,5 +42,5 @@ /**

prevWreckagesText.startLine = startLine;
prevWreckagesText.endLine = ml_ast_1.getEndLine(raw, startLine);
prevWreckagesText.endLine = parser_utils_1.getEndLine(raw, startLine);
prevWreckagesText.startCol = startCol;
prevWreckagesText.endCol = ml_ast_1.getEndCol(raw, startCol);
prevWreckagesText.endCol = parser_utils_1.getEndCol(raw, startCol);
}

@@ -145,3 +91,3 @@ }

const lastTextNode = {
uuid: ml_ast_1.uuid(),
uuid: parser_utils_1.uuid(),
raw: lastTextContent,

@@ -151,5 +97,5 @@ startOffset: lastOffset,

startLine: line,
endLine: ml_ast_1.getEndLine(lastTextContent, line),
endLine: parser_utils_1.getEndLine(lastTextContent, line),
startCol: col,
endCol: ml_ast_1.getEndCol(lastTextContent, col),
endCol: parser_utils_1.getEndCol(lastTextContent, col),
nodeName: '#text',

@@ -242,1 +188,60 @@ type: ml_ast_1.MLASTNodeType.Text,

exports.flattenNodes = flattenNodes;
function arrayize(nodeTree, rawHtml) {
const nodeOrders = [];
let prevLine = 1;
let prevCol = 1;
let currentStartOffset = 0;
let currentEndOffset = 0;
/**
* pushing list
*/
parser_utils_1.walk(nodeTree, node => {
currentStartOffset = node.startOffset;
const diff = currentStartOffset - currentEndOffset;
if (diff > 0) {
const html = rawHtml.slice(currentEndOffset, currentStartOffset);
/**
* first white spaces
*/
if (/^\s+$/.test(html)) {
const spaces = html;
const textNode = {
uuid: parser_utils_1.uuid(),
raw: spaces,
startOffset: currentEndOffset,
endOffset: currentEndOffset + spaces.length,
startLine: prevLine,
endLine: parser_utils_1.getEndLine(spaces, prevLine),
startCol: prevCol,
endCol: parser_utils_1.getEndCol(spaces, prevCol),
nodeName: '#text',
type: ml_ast_1.MLASTNodeType.Text,
parentNode: node.parentNode,
prevNode: node.prevNode,
nextNode: node,
isFragment: false,
isGhost: false,
};
node.prevNode = textNode;
if (node.parentNode && node.parentNode.childNodes) {
node.parentNode.childNodes.unshift(textNode);
}
nodeOrders.push(textNode);
}
else if (/^<\/[a-z0-9][a-z0-9:-]*>$/i.test(html)) {
// close tag
}
else {
// never
}
}
currentEndOffset = currentStartOffset + node.raw.length;
prevLine = node.endLine;
prevCol = node.endCol;
// for ghost nodes
node.startOffset = node.startOffset || currentStartOffset;
node.endOffset = node.endOffset || currentEndOffset;
nodeOrders.push(node);
});
return nodeOrders;
}

@@ -6,5 +6,5 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
const ml_ast_1 = require("@markuplint/ml-ast");
const const_1 = require("./const");
const attr_tokenizer_1 = __importDefault(require("./attr-tokenizer"));
const parser_utils_1 = require("@markuplint/parser-utils");
// eslint-disable-next-line no-control-regex

@@ -47,7 +47,7 @@ const reAttrsInStartTag = /\s*[^\x00-\x1f\x7f-\x9f "'>/=]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^\s]*))?/;

const endTokens = reEndTokens.exec(raw);
const selfClosingSolidus = ml_ast_1.tokenizer(endTokens && endTokens[1], line, col, offset);
const selfClosingSolidus = parser_utils_1.tokenizer(endTokens && endTokens[1], line, col, offset);
line = selfClosingSolidus.endLine;
col = selfClosingSolidus.endCol;
offset = selfClosingSolidus.endOffset;
const endSpace = ml_ast_1.tokenizer(endTokens && endTokens[2], line, col, offset);
const endSpace = parser_utils_1.tokenizer(endTokens && endTokens[2], line, col, offset);
return {

@@ -54,0 +54,0 @@ tagName,

@@ -1,2 +0,2 @@

import { Parse } from '@markuplint/ml-ast';
import type { Parse } from '@markuplint/ml-ast';
export declare const parse: Parse;

@@ -7,9 +7,6 @@ "use strict";

exports.parse = void 0;
const ml_ast_1 = require("@markuplint/ml-ast");
const create_tree_1 = require("./create-tree");
const flatten_nodes_1 = require("./flatten-nodes");
const parser_utils_1 = require("@markuplint/parser-utils");
const is_document_fragment_1 = __importDefault(require("./is-document-fragment"));
const parse5_1 = __importDefault(require("parse5"));
const parse_raw_tag_1 = __importDefault(require("./parse-raw-tag"));
const P5_OPTIONS = { sourceCodeLocationInfo: true };
const parse = (rawCode, offsetOffset = 0, offsetLine = 0, offsetColumn = 0, isIgnoringFrontMatter) => {

@@ -20,6 +17,4 @@ if (isIgnoringFrontMatter) {

const isFragment = is_document_fragment_1.default(rawCode);
const doc = isFragment
? parse5_1.default.parseFragment(rawCode, P5_OPTIONS)
: parse5_1.default.parse(rawCode, P5_OPTIONS);
const nodeList = flatten_nodes_1.flattenNodes(traverse(doc, null, rawCode, offsetOffset, offsetLine, offsetColumn), rawCode);
const nodeTree = create_tree_1.createTree(rawCode, isFragment, offsetOffset, offsetLine, offsetColumn);
const nodeList = flatten_nodes_1.flattenNodes(nodeTree, rawCode);
return {

@@ -31,219 +26,1 @@ nodeList,

exports.parse = parse;
function traverse(rootNode, parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
const nodeList = [];
const childNodes = getChildNodes(rootNode);
let prevNode = null;
for (const p5node of childNodes) {
const node = nodeize(p5node, prevNode, parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn);
if (!node) {
continue;
}
if (prevNode) {
if (node.type !== ml_ast_1.MLASTNodeType.EndTag) {
prevNode.nextNode = node;
}
node.prevNode = prevNode;
}
prevNode = node;
nodeList.push(node);
}
return nodeList;
}
function nodeize(originNode, prevNode, parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
const nextNode = null;
if (!originNode.sourceCodeLocation) {
const prevToken = prevNode || parentNode;
const startOffset = prevToken ? prevToken.endOffset : 0;
const endOffset = prevToken ? prevToken.endOffset : 0;
const startLine = prevToken ? prevToken.endLine : 0;
const endLine = prevToken ? prevToken.endLine : 0;
const startCol = prevToken ? prevToken.endCol : 0;
const endCol = prevToken ? prevToken.endCol : 0;
const node = {
uuid: ml_ast_1.uuid(),
raw: '',
startOffset: startOffset + offsetOffset,
endOffset: endOffset + offsetOffset,
startLine: startLine + offsetLine,
endLine: endLine + offsetLine,
startCol: startCol + (startLine === 1 ? offsetColumn : 0),
endCol: endCol + (endLine === 1 ? offsetColumn : 0),
nodeName: originNode.nodeName,
type: ml_ast_1.MLASTNodeType.OmittedTag,
namespace: originNode.namespaceURI,
parentNode,
prevNode,
nextNode,
isFragment: false,
isGhost: true,
};
node.childNodes = traverse(originNode, node, rawHtml, offsetOffset, offsetLine, offsetColumn);
return node;
}
const { startOffset, endOffset, startLine, endLine, startCol, endCol } = originNode.sourceCodeLocation;
const raw = rawHtml.slice(startOffset, endOffset || startOffset);
switch (originNode.nodeName) {
case '#documentType': {
return {
uuid: ml_ast_1.uuid(),
raw,
// @ts-ignore
name: originNode.name || '',
// @ts-ignore
publicId: originNode.publicId || '',
// @ts-ignore
systemId: originNode.systemId || '',
startOffset: startOffset + offsetOffset,
endOffset: endOffset + offsetOffset,
startLine: startLine + offsetLine,
endLine: endLine + offsetLine,
startCol: startCol + (startLine === 1 ? offsetColumn : 0),
endCol: endCol + (endLine === 1 ? offsetColumn : 0),
nodeName: '#doctype',
type: ml_ast_1.MLASTNodeType.Doctype,
parentNode,
prevNode,
_addPrevNode: 102,
nextNode,
isFragment: false,
isGhost: false,
};
}
case '#text': {
const node = {
uuid: ml_ast_1.uuid(),
raw,
startOffset: startOffset + offsetOffset,
endOffset: endOffset + offsetOffset,
startLine: startLine + offsetLine,
endLine: endLine + offsetLine,
startCol: startCol + (startLine === 1 ? offsetColumn : 0),
endCol: endCol + (endLine === 1 ? offsetColumn : 0),
nodeName: '#text',
type: ml_ast_1.MLASTNodeType.Text,
parentNode,
prevNode,
nextNode,
isFragment: false,
isGhost: false,
};
return node;
}
case '#comment': {
return {
uuid: ml_ast_1.uuid(),
raw,
startOffset: startOffset + offsetOffset,
endOffset: endOffset + offsetOffset,
startLine: startLine + offsetLine,
endLine: endLine + offsetLine,
startCol: startCol + (startLine === 1 ? offsetColumn : 0),
endCol: endCol + (endLine === 1 ? offsetColumn : 0),
nodeName: '#comment',
type: ml_ast_1.MLASTNodeType.Comment,
parentNode,
prevNode,
nextNode,
isFragment: false,
isGhost: false,
};
}
default: {
const tagLoc = originNode.sourceCodeLocation.startTag;
const startTagRaw = originNode.sourceCodeLocation.startTag
? rawHtml.slice(tagLoc.startOffset, tagLoc.endOffset)
: rawHtml.slice(startOffset, endOffset || startOffset);
const tagTokens = parse_raw_tag_1.default(startTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
const tagName = tagTokens.tagName;
let endTag = null;
const endTagLoc = originNode.sourceCodeLocation.endTag;
if (endTagLoc) {
const { startOffset, endOffset, startLine, endLine, startCol, endCol } = endTagLoc;
const endTagRaw = rawHtml.slice(startOffset, endOffset);
const endTagTokens = parse_raw_tag_1.default(endTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
const endTagName = endTagTokens.tagName;
endTag = {
uuid: ml_ast_1.uuid(),
raw: endTagRaw,
startOffset: startOffset + offsetOffset,
endOffset: endOffset + offsetOffset,
startLine: startLine + offsetLine,
endLine: endLine + offsetLine,
startCol: startCol + (startLine === 1 ? offsetColumn : 0),
endCol: endCol + (endLine === 1 ? offsetColumn : 0),
nodeName: endTagName,
type: ml_ast_1.MLASTNodeType.EndTag,
namespace: originNode.namespaceURI,
attributes: endTagTokens.attrs,
parentNode,
prevNode,
nextNode,
pearNode: null,
isFragment: false,
isGhost: false,
tagOpenChar: '</',
tagCloseChar: '>',
};
}
const _endOffset = startOffset + startTagRaw.length;
const _endLine = ml_ast_1.getEndLine(startTagRaw, startLine);
const _endCol = ml_ast_1.getEndCol(startTagRaw, startCol);
const startTag = {
uuid: ml_ast_1.uuid(),
raw: startTagRaw,
startOffset: startOffset + offsetOffset,
endOffset: _endOffset + offsetOffset,
startLine: startLine + offsetLine,
endLine: _endLine + offsetLine,
startCol: startCol + (startLine === 1 ? offsetColumn : 0),
endCol: _endCol + (startLine === _endLine ? offsetColumn : 0),
nodeName: tagName,
type: ml_ast_1.MLASTNodeType.StartTag,
namespace: originNode.namespaceURI,
attributes: tagTokens.attrs,
parentNode,
prevNode,
nextNode,
pearNode: endTag,
selfClosingSolidus: tagTokens.selfClosingSolidus,
endSpace: tagTokens.endSpace,
isFragment: false,
isGhost: false,
tagOpenChar: '<',
tagCloseChar: '>',
};
if (endTag) {
endTag.pearNode = startTag;
}
startTag.childNodes = traverse(originNode, startTag, rawHtml, offsetOffset, offsetLine, offsetColumn);
return startTag;
}
}
}
/**
* getChildNodes
*
* - If node has "content" property then parse as document fragment.
* - If node is <noscript> then that childNodes is a TextNode. But parse as document fragment it for disabled script.
*/
function getChildNodes(rootNode) {
if (rootNode.nodeName === 'noscript') {
const textNode = rootNode.childNodes[0];
if (!textNode || textNode.nodeName !== '#text') {
return [];
}
// @ts-ignore
const html = textNode.value;
// @ts-ignore
const { startOffset, startLine, startCol } = rootNode.sourceCodeLocation;
const breakCount = startLine - 1;
const indentWidth = startCol - 1;
const spaces = ' '.repeat(startOffset - Math.max(breakCount, 0) - Math.max(indentWidth, 0)) +
'\n'.repeat(breakCount) +
' '.repeat(indentWidth);
const fragment = parse5_1.default.parseFragment(`${spaces}<x-script>${html}</x-script>`, P5_OPTIONS);
const childNodes = fragment.childNodes[spaces ? 1 : 0].childNodes;
return childNodes;
}
return rootNode.content ? rootNode.content.childNodes : rootNode.childNodes;
}
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const ml_ast_1 = require("@markuplint/ml-ast");
const parser_utils_1 = require("@markuplint/parser-utils");
const const_1 = require("./const");

@@ -88,6 +88,6 @@ function tagSplitter(raw, line, col) {

}
line = ml_ast_1.getEndLine(node, line);
col = ml_ast_1.getEndCol(node, col);
line = parser_utils_1.getEndLine(node, line);
col = parser_utils_1.getEndCol(node, col);
}
return result;
}
{
"name": "@markuplint/html-parser",
"version": "1.2.0",
"version": "1.3.0",
"description": "HTML parser for markuplint",

@@ -20,10 +20,10 @@ "repository": "git@github.com:markuplint/markuplint.git",

"devDependencies": {
"@types/parse5": "^5.0.3"
"@types/parse5": "^6.0.0"
},
"dependencies": {
"@markuplint/ml-ast": "^1.2.0",
"@markuplint/parser-utils": "^1.1.0",
"@markuplint/ml-ast": "^1.3.0",
"@markuplint/parser-utils": "^1.2.0",
"parse5": "^6.0.1"
},
"gitHead": "1806f99b6429b73ce498f6ecf641efc2400f47dd"
"gitHead": "15e11fd74042d8b378387644d36cd882962a9ec8"
}

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc