@@ -1,6 +0,5 @@
		import { ExternalTokenizer, Parser, NodeProp } from 'lezer';
		import { ContextTracker, ExternalTokenizer, Parser, NodeProp } from 'lezer';

		// This file was generated by lezer-generator. You probably shouldn't edit it.
		const
		StartTag = 1,
		const StartTag = 1,
		StartCloseTag = 2,
		@@ -14,3 +13,2 @@ MismatchedStartCloseTag = 3,
		OpenTag = 11,
		SelfClosingTag = 20,
		RawText = 25,
		@@ -65,56 +63,65 @@ Dialect_noMatch = 0;

		let cachedName = null, cachedInput = null, cachedPos = 0;
		function tagNameAfter(input, pos) {
		if (cachedPos == pos && cachedInput == input) return cachedName
		let next = input.get(pos);
		while (isSpace(next)) next = input.get(++pos);
		let start = pos;
		while (nameChar(next)) next = input.get(++pos);
		// Undefined to signal there's a <? or <!, null for just missing
		cachedInput = input; cachedPos = pos;
		return cachedName = pos > start ? input.read(start, pos).toLowerCase() : next == question \|\| next == bang ? undefined : null
		}

		const lessThan = 60, greaterThan = 62, slash = 47, question = 63, bang = 33;

		const tagStartExpr = /^<\s*([\.\-\:\w\xa1-\uffff]+)/;
		function ElementContext(name, parent) {
		this.name = name;
		this.parent = parent;
		this.hash = parent ? parent.hash : 0;
		for (let i = 0; i < name.length; i++) this.hash += (this.hash << 4) + name.charCodeAt(i) + (name.charCodeAt(i) << 8);
		}

		let elementQuery = [Element], openAt = 0;
		const elementContext = new ContextTracker({
		start: null,
		shift(context, term, input, stack) {
		return term == StartTag ? new ElementContext(tagNameAfter(input, stack.pos) \|\| "", context) : context
		},
		reduce(context, term) {
		return term == Element && context ? context.parent : context
		},
		reuse(context, node, input, stack) {
		let type = node.type.id;
		return type == StartTag \|\| type == OpenTag
		? new ElementContext(tagNameAfter(input, stack.pos - node.length + 1) \|\| "", context) : context
		},
		// Always returns 0 to avoid interfering with reuse. May not be safe
		// but I haven't found a counterexample yet.
		hash() { return 0 }
		});

		function parentElement(input, stack, pos, len) {
		openAt = stack.startOf(elementQuery, pos);
		if (openAt == null) return null
		let match = tagStartExpr.exec(input.read(openAt, openAt + len + 10));
		return match ? match[1].toLowerCase() : ""
		}

		const tagStart = new ExternalTokenizer((input, token, stack) => {
		let pos = token.start, first = input.get(pos);
		// End of file, just close anything
		if (first < 0) {
		let contextStart = stack.startOf(elementQuery);
		let match = contextStart == null ? null : tagStartExpr.exec(input.read(contextStart, contextStart + 30));
		if (match && implicitlyClosed[match[1].toLowerCase()]) token.accept(missingCloseTag, token.start);
		}
		let pos = token.start, first = input.get(pos), close;
		// End of file, close any open tags
		if (first < 0 && stack.context) token.accept(missingCloseTag, token.start);
		if (first != lessThan) return
		pos++;
		let close = false, tokEnd = pos;
		for (let next; next = input.get(pos);) {
		if (next == slash && !close) { close = true; pos++; tokEnd = pos; }
		else if (next == question \|\| next == bang) return
		else if (isSpace(next)) pos++;
		else break
		}
		let nameStart = pos;
		while (nameChar(input.get(pos))) pos++;
		if (pos == nameStart) return token.accept(close ? IncompleteCloseTag : StartTag, tokEnd)
		if (close = (input.get(pos) == slash)) pos++;
		let name = tagNameAfter(input, pos);
		if (name === undefined) return
		if (!name) return token.accept(close ? IncompleteCloseTag : StartTag, pos)

		let name = input.read(nameStart, pos).toLowerCase();
		let parent = parentElement(input, stack, stack.pos, name.length);
		let parent = stack.context ? stack.context.name : null;
		if (close) {
		if (name == parent) return token.accept(StartCloseTag, tokEnd)
		if (implicitlyClosed[parent]) return token.accept(missingCloseTag, token.start)
		if (stack.dialectEnabled(Dialect_noMatch)) return token.accept(StartCloseTag, tokEnd)
		while (parent != null) {
		parent = parentElement(input, stack, openAt - 1, name.length);
		if (parent == name) return
		}
		token.accept(MismatchedStartCloseTag, tokEnd);
		if (name == parent) return token.accept(StartCloseTag, pos)
		if (parent && implicitlyClosed[parent]) return token.accept(missingCloseTag, token.start)
		if (stack.dialectEnabled(Dialect_noMatch)) return token.accept(StartCloseTag, pos)
		for (let cx = stack.context; cx; cx = cx.parent) if (cx.name == name) return
		token.accept(MismatchedStartCloseTag, pos);
		} else {
		if (parent && closeOnOpen[parent] && closeOnOpen[parent][name])
		return token.accept(missingCloseTag, token.start)
		token.accept(StartTag, tokEnd);
		if (parent && closeOnOpen[parent] && closeOnOpen[parent][name]) token.accept(missingCloseTag, token.start);
		else token.accept(StartTag, pos);
		}
		}, {contextual: true});
		});

		const tagQuery = [OpenTag, SelfClosingTag];

		const selfClosed = new ExternalTokenizer((input, token, stack) => {
		@@ -128,8 +135,6 @@ let next = input.get(token.start), end = token.start + 1;
		}
		let from = stack.startOf(tagQuery);
		let match = from == null ? null : tagStartExpr.exec(input.read(from, token.start));
		if (match && selfClosers[match[1].toLowerCase()]) token.accept(SelfCloseEndTag, end);
		}, {contextual: true});
		if (stack.context && selfClosers[stack.context.name]) token.accept(SelfCloseEndTag, end);
		});

		const commentContent$1 = new ExternalTokenizer((input, token, stack) => {
		const commentContent$1 = new ExternalTokenizer((input, token) => {
		let pos = token.start, endPos = 0;
		@@ -218,2 +223,3 @@ for (;;) {
		maxTerm: 44,
		context: elementContext,
		nodeProps: [
		@@ -220,0 +226,0 @@ [NodeProp.closedBy, -2,1,2,"EndTag SelfCloseEndTag",11,"CloseTag"],

package.json

		{
		"name": "lezer-html",
		"version": "0.13.2",
		"version": "0.13.3",
		"description": "lezer-based HTML grammar",
		@@ -17,3 +17,3 @@ "main": "dist/index.cjs",
		"lezer-javascript": "^0.13.0",
		"lezer-generator": "^0.13.0",
		"lezer-generator": "^0.13.3",
		"mocha": "^8.1.3",
		@@ -24,3 +24,3 @@ "rollup": "^2.27.1",
		"dependencies": {
		"lezer": "^0.13.0"
		"lezer": "^0.13.2"
		},
		@@ -27,0 +27,0 @@ "repository": {

src/parser.js

		// This file was generated by lezer-generator. You probably shouldn't edit it.
		import {Parser} from "lezer"
		import {tagStart, selfClosed, commentContent} from "./tokens.js"
		import {tagStart, selfClosed, commentContent, elementContext} from "./tokens.js"
		import {elementContent} from "./content.js"
		@@ -13,2 +13,3 @@ import {NodeProp} from "lezer"
		maxTerm: 44,
		context: elementContext,
		nodeProps: [
		@@ -15,0 +16,0 @@ [NodeProp.closedBy, -2,1,2,"EndTag SelfCloseEndTag",11,"CloseTag"],

105

src/tokens.js

		/* Hand-written tokenizers for HTML. */

		import {ExternalTokenizer} from "lezer"
		import {ExternalTokenizer, ContextTracker} from "lezer"
		import {StartTag, StartCloseTag, MismatchedStartCloseTag, missingCloseTag,
		SelfCloseEndTag, IncompleteCloseTag, Element, OpenTag, SelfClosingTag,
		SelfCloseEndTag, IncompleteCloseTag, Element, OpenTag,
		Dialect_noMatch, commentContent as cmntContent} from "./parser.terms.js"
		@@ -52,56 +52,65 @@

		let cachedName = null, cachedInput = null, cachedPos = 0
		function tagNameAfter(input, pos) {
		if (cachedPos == pos && cachedInput == input) return cachedName
		let next = input.get(pos)
		while (isSpace(next)) next = input.get(++pos)
		let start = pos
		while (nameChar(next)) next = input.get(++pos)
		// Undefined to signal there's a <? or <!, null for just missing
		cachedInput = input; cachedPos = pos
		return cachedName = pos > start ? input.read(start, pos).toLowerCase() : next == question \|\| next == bang ? undefined : null
		}

		const lessThan = 60, greaterThan = 62, slash = 47, question = 63, bang = 33

		const tagStartExpr = /^<\s*([\.\-\:\w\xa1-\uffff]+)/
		function ElementContext(name, parent) {
		this.name = name
		this.parent = parent
		this.hash = parent ? parent.hash : 0
		for (let i = 0; i < name.length; i++) this.hash += (this.hash << 4) + name.charCodeAt(i) + (name.charCodeAt(i) << 8)
		}

		let elementQuery = [Element], openAt = 0
		export const elementContext = new ContextTracker({
		start: null,
		shift(context, term, input, stack) {
		return term == StartTag ? new ElementContext(tagNameAfter(input, stack.pos) \|\| "", context) : context
		},
		reduce(context, term) {
		return term == Element && context ? context.parent : context
		},
		reuse(context, node, input, stack) {
		let type = node.type.id
		return type == StartTag \|\| type == OpenTag
		? new ElementContext(tagNameAfter(input, stack.pos - node.length + 1) \|\| "", context) : context
		},
		// Always returns 0 to avoid interfering with reuse. May not be safe
		// but I haven't found a counterexample yet.
		hash() { return 0 }
		})

		function parentElement(input, stack, pos, len) {
		openAt = stack.startOf(elementQuery, pos)
		if (openAt == null) return null
		let match = tagStartExpr.exec(input.read(openAt, openAt + len + 10))
		return match ? match[1].toLowerCase() : ""
		}

		export const tagStart = new ExternalTokenizer((input, token, stack) => {
		let pos = token.start, first = input.get(pos)
		// End of file, just close anything
		if (first < 0) {
		let contextStart = stack.startOf(elementQuery)
		let match = contextStart == null ? null : tagStartExpr.exec(input.read(contextStart, contextStart + 30))
		if (match && implicitlyClosed[match[1].toLowerCase()]) token.accept(missingCloseTag, token.start)
		}
		let pos = token.start, first = input.get(pos), close
		// End of file, close any open tags
		if (first < 0 && stack.context) token.accept(missingCloseTag, token.start)
		if (first != lessThan) return
		pos++
		let close = false, tokEnd = pos
		for (let next; next = input.get(pos);) {
		if (next == slash && !close) { close = true; pos++; tokEnd = pos }
		else if (next == question \|\| next == bang) return
		else if (isSpace(next)) pos++
		else break
		}
		let nameStart = pos
		while (nameChar(input.get(pos))) pos++
		if (pos == nameStart) return token.accept(close ? IncompleteCloseTag : StartTag, tokEnd)
		if (close = (input.get(pos) == slash)) pos++
		let name = tagNameAfter(input, pos)
		if (name === undefined) return
		if (!name) return token.accept(close ? IncompleteCloseTag : StartTag, pos)

		let name = input.read(nameStart, pos).toLowerCase()
		let parent = parentElement(input, stack, stack.pos, name.length)
		let parent = stack.context ? stack.context.name : null
		if (close) {
		if (name == parent) return token.accept(StartCloseTag, tokEnd)
		if (implicitlyClosed[parent]) return token.accept(missingCloseTag, token.start)
		if (stack.dialectEnabled(Dialect_noMatch)) return token.accept(StartCloseTag, tokEnd)
		while (parent != null) {
		parent = parentElement(input, stack, openAt - 1, name.length)
		if (parent == name) return
		}
		token.accept(MismatchedStartCloseTag, tokEnd)
		if (name == parent) return token.accept(StartCloseTag, pos)
		if (parent && implicitlyClosed[parent]) return token.accept(missingCloseTag, token.start)
		if (stack.dialectEnabled(Dialect_noMatch)) return token.accept(StartCloseTag, pos)
		for (let cx = stack.context; cx; cx = cx.parent) if (cx.name == name) return
		token.accept(MismatchedStartCloseTag, pos)
		} else {
		if (parent && closeOnOpen[parent] && closeOnOpen[parent][name])
		return token.accept(missingCloseTag, token.start)
		token.accept(StartTag, tokEnd)
		if (parent && closeOnOpen[parent] && closeOnOpen[parent][name]) token.accept(missingCloseTag, token.start)
		else token.accept(StartTag, pos)
		}
		}, {contextual: true})
		})

		const tagQuery = [OpenTag, SelfClosingTag]

		export const selfClosed = new ExternalTokenizer((input, token, stack) => {
		@@ -115,8 +124,6 @@ let next = input.get(token.start), end = token.start + 1
		}
		let from = stack.startOf(tagQuery)
		let match = from == null ? null : tagStartExpr.exec(input.read(from, token.start))
		if (match && selfClosers[match[1].toLowerCase()]) token.accept(SelfCloseEndTag, end)
		}, {contextual: true})
		if (stack.context && selfClosers[stack.context.name]) token.accept(SelfCloseEndTag, end)
		})

		export const commentContent = new ExternalTokenizer((input, token, stack) => {
		export const commentContent = new ExternalTokenizer((input, token) => {
		let pos = token.start, endPos = 0
		@@ -123,0 +130,0 @@ for (;;) {

test/mixed.txt

		@@ -39,3 +39,3 @@ # Doesn't parse VB as JS
		Element(OpenTag(StartTag,TagName,EndTag),
		Script(ExpressionStatement(null)),⚠),⚠))
		Script(ExpressionStatement(null)))))

		@@ -42,0 +42,0 @@ # Error in JS

test/tags.txt

		@@ -118,3 +118,3 @@ # Regular tag

		Document(Element(OpenTag(StartTag,TagName,EndTag),MismatchedCloseTag(StartCloseTag,TagName,EndTag),⚠))
		Document(Element(OpenTag(StartTag,TagName,EndTag),MismatchedCloseTag(StartCloseTag,TagName,EndTag)))

		@@ -127,3 +127,3 @@ # Unclosed tag

		Document(Element(OpenTag(StartTag,TagName,EndTag),⚠))
		Document(Element(OpenTag(StartTag,TagName,EndTag)))

		@@ -283,4 +283,4 @@ # Ignore pseudo-xml self-closers
		foo=bar
		>hi<
		/body
		>hi</
		body
		>
		@@ -287,0 +287,0 @@

dist/index.cjs

Sorry, the diff of this file is not supported yet

src/html.grammar

Sorry, the diff of this file is not supported yet

lezer-html - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Dependency changes