Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

lezer-html

Package Overview
Dependencies
Maintainers
1
Versions
23
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

lezer-html - npm Package Compare versions

Comparing version 0.13.2 to 0.13.3

test/test-incremental.js

6

CHANGELOG.md

@@ -0,1 +1,7 @@

## 0.13.3 (2021-02-17)
### Bug fixes
Optimize the tokenizer by using a context tracker.
## 0.13.2 (2021-01-22)

@@ -2,0 +8,0 @@

108

dist/index.es.js

@@ -1,6 +0,5 @@

import { ExternalTokenizer, Parser, NodeProp } from 'lezer';
import { ContextTracker, ExternalTokenizer, Parser, NodeProp } from 'lezer';
// This file was generated by lezer-generator. You probably shouldn't edit it.
const
StartTag = 1,
const StartTag = 1,
StartCloseTag = 2,

@@ -14,3 +13,2 @@ MismatchedStartCloseTag = 3,

OpenTag = 11,
SelfClosingTag = 20,
RawText = 25,

@@ -65,56 +63,65 @@ Dialect_noMatch = 0;

let cachedName = null, cachedInput = null, cachedPos = 0;
function tagNameAfter(input, pos) {
if (cachedPos == pos && cachedInput == input) return cachedName
let next = input.get(pos);
while (isSpace(next)) next = input.get(++pos);
let start = pos;
while (nameChar(next)) next = input.get(++pos);
// Undefined to signal there's a <? or <!, null for just missing
cachedInput = input; cachedPos = pos;
return cachedName = pos > start ? input.read(start, pos).toLowerCase() : next == question || next == bang ? undefined : null
}
const lessThan = 60, greaterThan = 62, slash = 47, question = 63, bang = 33;
const tagStartExpr = /^<\s*([\.\-\:\w\xa1-\uffff]+)/;
function ElementContext(name, parent) {
this.name = name;
this.parent = parent;
this.hash = parent ? parent.hash : 0;
for (let i = 0; i < name.length; i++) this.hash += (this.hash << 4) + name.charCodeAt(i) + (name.charCodeAt(i) << 8);
}
let elementQuery = [Element], openAt = 0;
const elementContext = new ContextTracker({
start: null,
shift(context, term, input, stack) {
return term == StartTag ? new ElementContext(tagNameAfter(input, stack.pos) || "", context) : context
},
reduce(context, term) {
return term == Element && context ? context.parent : context
},
reuse(context, node, input, stack) {
let type = node.type.id;
return type == StartTag || type == OpenTag
? new ElementContext(tagNameAfter(input, stack.pos - node.length + 1) || "", context) : context
},
// Always returns 0 to avoid interfering with reuse. May not be safe
// but I haven't found a counterexample yet.
hash() { return 0 }
});
function parentElement(input, stack, pos, len) {
openAt = stack.startOf(elementQuery, pos);
if (openAt == null) return null
let match = tagStartExpr.exec(input.read(openAt, openAt + len + 10));
return match ? match[1].toLowerCase() : ""
}
const tagStart = new ExternalTokenizer((input, token, stack) => {
let pos = token.start, first = input.get(pos);
// End of file, just close anything
if (first < 0) {
let contextStart = stack.startOf(elementQuery);
let match = contextStart == null ? null : tagStartExpr.exec(input.read(contextStart, contextStart + 30));
if (match && implicitlyClosed[match[1].toLowerCase()]) token.accept(missingCloseTag, token.start);
}
let pos = token.start, first = input.get(pos), close;
// End of file, close any open tags
if (first < 0 && stack.context) token.accept(missingCloseTag, token.start);
if (first != lessThan) return
pos++;
let close = false, tokEnd = pos;
for (let next; next = input.get(pos);) {
if (next == slash && !close) { close = true; pos++; tokEnd = pos; }
else if (next == question || next == bang) return
else if (isSpace(next)) pos++;
else break
}
let nameStart = pos;
while (nameChar(input.get(pos))) pos++;
if (pos == nameStart) return token.accept(close ? IncompleteCloseTag : StartTag, tokEnd)
if (close = (input.get(pos) == slash)) pos++;
let name = tagNameAfter(input, pos);
if (name === undefined) return
if (!name) return token.accept(close ? IncompleteCloseTag : StartTag, pos)
let name = input.read(nameStart, pos).toLowerCase();
let parent = parentElement(input, stack, stack.pos, name.length);
let parent = stack.context ? stack.context.name : null;
if (close) {
if (name == parent) return token.accept(StartCloseTag, tokEnd)
if (implicitlyClosed[parent]) return token.accept(missingCloseTag, token.start)
if (stack.dialectEnabled(Dialect_noMatch)) return token.accept(StartCloseTag, tokEnd)
while (parent != null) {
parent = parentElement(input, stack, openAt - 1, name.length);
if (parent == name) return
}
token.accept(MismatchedStartCloseTag, tokEnd);
if (name == parent) return token.accept(StartCloseTag, pos)
if (parent && implicitlyClosed[parent]) return token.accept(missingCloseTag, token.start)
if (stack.dialectEnabled(Dialect_noMatch)) return token.accept(StartCloseTag, pos)
for (let cx = stack.context; cx; cx = cx.parent) if (cx.name == name) return
token.accept(MismatchedStartCloseTag, pos);
} else {
if (parent && closeOnOpen[parent] && closeOnOpen[parent][name])
return token.accept(missingCloseTag, token.start)
token.accept(StartTag, tokEnd);
if (parent && closeOnOpen[parent] && closeOnOpen[parent][name]) token.accept(missingCloseTag, token.start);
else token.accept(StartTag, pos);
}
}, {contextual: true});
});
const tagQuery = [OpenTag, SelfClosingTag];
const selfClosed = new ExternalTokenizer((input, token, stack) => {

@@ -128,8 +135,6 @@ let next = input.get(token.start), end = token.start + 1;

}
let from = stack.startOf(tagQuery);
let match = from == null ? null : tagStartExpr.exec(input.read(from, token.start));
if (match && selfClosers[match[1].toLowerCase()]) token.accept(SelfCloseEndTag, end);
}, {contextual: true});
if (stack.context && selfClosers[stack.context.name]) token.accept(SelfCloseEndTag, end);
});
const commentContent$1 = new ExternalTokenizer((input, token, stack) => {
const commentContent$1 = new ExternalTokenizer((input, token) => {
let pos = token.start, endPos = 0;

@@ -218,2 +223,3 @@ for (;;) {

maxTerm: 44,
context: elementContext,
nodeProps: [

@@ -220,0 +226,0 @@ [NodeProp.closedBy, -2,1,2,"EndTag SelfCloseEndTag",11,"CloseTag"],

{
"name": "lezer-html",
"version": "0.13.2",
"version": "0.13.3",
"description": "lezer-based HTML grammar",

@@ -17,3 +17,3 @@ "main": "dist/index.cjs",

"lezer-javascript": "^0.13.0",
"lezer-generator": "^0.13.0",
"lezer-generator": "^0.13.3",
"mocha": "^8.1.3",

@@ -24,3 +24,3 @@ "rollup": "^2.27.1",

"dependencies": {
"lezer": "^0.13.0"
"lezer": "^0.13.2"
},

@@ -27,0 +27,0 @@ "repository": {

// This file was generated by lezer-generator. You probably shouldn't edit it.
import {Parser} from "lezer"
import {tagStart, selfClosed, commentContent} from "./tokens.js"
import {tagStart, selfClosed, commentContent, elementContext} from "./tokens.js"
import {elementContent} from "./content.js"

@@ -13,2 +13,3 @@ import {NodeProp} from "lezer"

maxTerm: 44,
context: elementContext,
nodeProps: [

@@ -15,0 +16,0 @@ [NodeProp.closedBy, -2,1,2,"EndTag SelfCloseEndTag",11,"CloseTag"],

/* Hand-written tokenizers for HTML. */
import {ExternalTokenizer} from "lezer"
import {ExternalTokenizer, ContextTracker} from "lezer"
import {StartTag, StartCloseTag, MismatchedStartCloseTag, missingCloseTag,
SelfCloseEndTag, IncompleteCloseTag, Element, OpenTag, SelfClosingTag,
SelfCloseEndTag, IncompleteCloseTag, Element, OpenTag,
Dialect_noMatch, commentContent as cmntContent} from "./parser.terms.js"

@@ -52,56 +52,65 @@

let cachedName = null, cachedInput = null, cachedPos = 0
function tagNameAfter(input, pos) {
if (cachedPos == pos && cachedInput == input) return cachedName
let next = input.get(pos)
while (isSpace(next)) next = input.get(++pos)
let start = pos
while (nameChar(next)) next = input.get(++pos)
// Undefined to signal there's a <? or <!, null for just missing
cachedInput = input; cachedPos = pos
return cachedName = pos > start ? input.read(start, pos).toLowerCase() : next == question || next == bang ? undefined : null
}
const lessThan = 60, greaterThan = 62, slash = 47, question = 63, bang = 33
const tagStartExpr = /^<\s*([\.\-\:\w\xa1-\uffff]+)/
function ElementContext(name, parent) {
this.name = name
this.parent = parent
this.hash = parent ? parent.hash : 0
for (let i = 0; i < name.length; i++) this.hash += (this.hash << 4) + name.charCodeAt(i) + (name.charCodeAt(i) << 8)
}
let elementQuery = [Element], openAt = 0
export const elementContext = new ContextTracker({
start: null,
shift(context, term, input, stack) {
return term == StartTag ? new ElementContext(tagNameAfter(input, stack.pos) || "", context) : context
},
reduce(context, term) {
return term == Element && context ? context.parent : context
},
reuse(context, node, input, stack) {
let type = node.type.id
return type == StartTag || type == OpenTag
? new ElementContext(tagNameAfter(input, stack.pos - node.length + 1) || "", context) : context
},
// Always returns 0 to avoid interfering with reuse. May not be safe
// but I haven't found a counterexample yet.
hash() { return 0 }
})
function parentElement(input, stack, pos, len) {
openAt = stack.startOf(elementQuery, pos)
if (openAt == null) return null
let match = tagStartExpr.exec(input.read(openAt, openAt + len + 10))
return match ? match[1].toLowerCase() : ""
}
export const tagStart = new ExternalTokenizer((input, token, stack) => {
let pos = token.start, first = input.get(pos)
// End of file, just close anything
if (first < 0) {
let contextStart = stack.startOf(elementQuery)
let match = contextStart == null ? null : tagStartExpr.exec(input.read(contextStart, contextStart + 30))
if (match && implicitlyClosed[match[1].toLowerCase()]) token.accept(missingCloseTag, token.start)
}
let pos = token.start, first = input.get(pos), close
// End of file, close any open tags
if (first < 0 && stack.context) token.accept(missingCloseTag, token.start)
if (first != lessThan) return
pos++
let close = false, tokEnd = pos
for (let next; next = input.get(pos);) {
if (next == slash && !close) { close = true; pos++; tokEnd = pos }
else if (next == question || next == bang) return
else if (isSpace(next)) pos++
else break
}
let nameStart = pos
while (nameChar(input.get(pos))) pos++
if (pos == nameStart) return token.accept(close ? IncompleteCloseTag : StartTag, tokEnd)
if (close = (input.get(pos) == slash)) pos++
let name = tagNameAfter(input, pos)
if (name === undefined) return
if (!name) return token.accept(close ? IncompleteCloseTag : StartTag, pos)
let name = input.read(nameStart, pos).toLowerCase()
let parent = parentElement(input, stack, stack.pos, name.length)
let parent = stack.context ? stack.context.name : null
if (close) {
if (name == parent) return token.accept(StartCloseTag, tokEnd)
if (implicitlyClosed[parent]) return token.accept(missingCloseTag, token.start)
if (stack.dialectEnabled(Dialect_noMatch)) return token.accept(StartCloseTag, tokEnd)
while (parent != null) {
parent = parentElement(input, stack, openAt - 1, name.length)
if (parent == name) return
}
token.accept(MismatchedStartCloseTag, tokEnd)
if (name == parent) return token.accept(StartCloseTag, pos)
if (parent && implicitlyClosed[parent]) return token.accept(missingCloseTag, token.start)
if (stack.dialectEnabled(Dialect_noMatch)) return token.accept(StartCloseTag, pos)
for (let cx = stack.context; cx; cx = cx.parent) if (cx.name == name) return
token.accept(MismatchedStartCloseTag, pos)
} else {
if (parent && closeOnOpen[parent] && closeOnOpen[parent][name])
return token.accept(missingCloseTag, token.start)
token.accept(StartTag, tokEnd)
if (parent && closeOnOpen[parent] && closeOnOpen[parent][name]) token.accept(missingCloseTag, token.start)
else token.accept(StartTag, pos)
}
}, {contextual: true})
})
const tagQuery = [OpenTag, SelfClosingTag]
export const selfClosed = new ExternalTokenizer((input, token, stack) => {

@@ -115,8 +124,6 @@ let next = input.get(token.start), end = token.start + 1

}
let from = stack.startOf(tagQuery)
let match = from == null ? null : tagStartExpr.exec(input.read(from, token.start))
if (match && selfClosers[match[1].toLowerCase()]) token.accept(SelfCloseEndTag, end)
}, {contextual: true})
if (stack.context && selfClosers[stack.context.name]) token.accept(SelfCloseEndTag, end)
})
export const commentContent = new ExternalTokenizer((input, token, stack) => {
export const commentContent = new ExternalTokenizer((input, token) => {
let pos = token.start, endPos = 0

@@ -123,0 +130,0 @@ for (;;) {

@@ -39,3 +39,3 @@ # Doesn't parse VB as JS

Element(OpenTag(StartTag,TagName,EndTag),
Script(ExpressionStatement(null)),⚠),⚠))
Script(ExpressionStatement(null)))))

@@ -42,0 +42,0 @@ # Error in JS

@@ -118,3 +118,3 @@ # Regular tag

Document(Element(OpenTag(StartTag,TagName,EndTag),MismatchedCloseTag(StartCloseTag,TagName,EndTag),⚠))
Document(Element(OpenTag(StartTag,TagName,EndTag),MismatchedCloseTag(StartCloseTag,TagName,EndTag)))

@@ -127,3 +127,3 @@ # Unclosed tag

Document(Element(OpenTag(StartTag,TagName,EndTag),⚠))
Document(Element(OpenTag(StartTag,TagName,EndTag)))

@@ -283,4 +283,4 @@ # Ignore pseudo-xml self-closers

foo=bar
>hi<
/body
>hi</
body
>

@@ -287,0 +287,0 @@

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc