tiny-html-lexer
Advanced tools
Comparing version 0.8.3 to 0.8.4
@@ -43,3 +43,3 @@ "use strict" | ||
const CHARREF_HEX = '&#[xX][0-9A-Fa-f]+;?' | ||
const CHARREF_NAME = '&[A-Za-z][A-Za-z0-9]*;?' | ||
const CHARREF_NAMED = '&[A-Za-z][A-Za-z0-9]*;?' | ||
const ATTNAME = '.[^>/\t\n\f =]*' /* '[^>/\t\n\f ][^>/\t\n\f =]*' */ | ||
@@ -49,2 +49,9 @@ const ATT_UNQUOT = '[^&>\t\n\f ]+' | ||
// The below generated by preprocessing the list of named character references; | ||
// Legacy charrefs may occur without terminating semicolon but not as a prefix | ||
// of a known named reference. | ||
const CHARREF_CONTD = '&(?:copysr|centerdot|divideontimes|[gl]t(?:quest|dot|cir|cc)|[gl]trPar|gtr(?:dot|less|eqqless|eqless|approx|arr|sim)|ltr(?:i|if|ie|mes)|ltlarr|lthree|notin(?:dot|E|v[abc])?|notni(?:v[abc])?|parallel|times(?:bar|d|b));' | ||
const CHARREF_LEGACY = '&(?:[AEIOUYaeiouy]?acute|[AEIOUaeiou](?:grave|circ|uml)|y?uml|[ANOano]tilde|[Aa]ring|[Oo]slash|[Cc]?cedil|brvbar|curren|divide|frac(?:12|14|34)|iquest|middot|plusmn|(?:AE|ae|sz)lig|[lr]aquo|iexcl|micro|pound|THORN|thorn|times|COPY|copy|cent|macr|nbsp|ord[fm]|para|QUOT|quot|sect|sup[123]|AMP|amp|ETH|eth|REG|reg|deg|not|shy|yen|GT|gt|LT|lt)' | ||
const T = tokenTypes | ||
@@ -78,3 +85,5 @@ const grammar = | ||
{ if: CHARREF_HEX, emit: T.charRefHex, goto: context }, | ||
{ if: CHARREF_NAME, emit: T.charRefNamed, goto: context }, | ||
{ if: CHARREF_CONTD, emit: T.charRefNamed, goto: context }, | ||
{ if: CHARREF_LEGACY, emit: legacyCharRefT, goto: context }, // TODO special caase in attribute | ||
{ if: CHARREF_NAMED, emit: T.charRefNamed, goto: context }, | ||
{ if: '&', emit: T.unescaped, goto: context }], | ||
@@ -153,6 +162,8 @@ | ||
function PrivateState () { | ||
function PrivateState (input) { | ||
this.content = 'data' // one of { data, rcdata, rawtext, unquoted, doubleQuoted, singleQuoted } | ||
this.context = 'data' // likewise | ||
this.tagName // the last seen 'startTag-start' name | ||
this.position = 0 | ||
this.input = input | ||
} | ||
@@ -164,3 +175,3 @@ | ||
this.content = tagName in content_map ? content_map[tagName] : 'data' | ||
return 'beforeAtt' | ||
return 'beforeAtt' | ||
} | ||
@@ -176,2 +187,16 @@ | ||
// From the spec; | ||
// "If the character reference was consumed as part of an attribute, | ||
// and the last character matched is not a U+003B SEMICOLON character (;), | ||
// and the next input character is either a U+003D EQUALS SIGN character (=) or an ASCII alphanumeric, | ||
// then, for historical reasons, flush code points consumed as a character reference and switch to the return state." | ||
function legacyCharRefT () { | ||
const x = this.context, c = this.input[this.position] | ||
if ((x === 'unquoted' || x === 'doubleQuoted' || x === 'singleQuoted') && /[a-zA-Z0-9=]/.test(c)) { | ||
return T.attributeValueData | ||
} | ||
return T.charRefNamed | ||
} | ||
function maybeEndTagT (_, chunk) { | ||
@@ -237,3 +262,3 @@ if (chunk.substr (2) === this.tagName) { | ||
function tokenize (input) { | ||
const custom = new CustomState () | ||
const custom = new CustomState (input) | ||
let symbol = start | ||
@@ -240,0 +265,0 @@ , state = states [symbol] |
{ | ||
"name": "tiny-html-lexer", | ||
"version": "0.8.3", | ||
"version": "0.8.4", | ||
"description": "A tiny HTML5 lexer", | ||
@@ -5,0 +5,0 @@ "main": "lib/index.js", |
@@ -46,3 +46,3 @@ A tiny HTML5 lexer | ||
- `"attribute-name"` | ||
- `"attribute-equals"` | ||
- `"attribute-assign"` | ||
- `"attribute-value-start"` | ||
@@ -81,2 +81,20 @@ - `"attribute-value-data"` | ||
Changelog | ||
------------ | ||
### 0.8.4 | ||
- Correct handling of legacy (unterminated) named character references. | ||
### 0.8.3 | ||
- Added typescript annotations. | ||
- Token type `attribute-equals` has been renamed to `attribute-assign`. | ||
- Renamed export `tokens` to `tokenTypes`. | ||
### 0.8.1 | ||
- Fix for incorrect parsing of slashes between attributes. | ||
### 0.8.0 | ||
- First public release. | ||
Some implementation details | ||
@@ -83,0 +101,0 @@ --------------------------- |
@@ -10,2 +10,4 @@ | ||
, 'charref: decimal non-terminated n in data' | ||
, 'charref: special <input value=asda¬(></input>' | ||
, 'charref: special <input value=asda¬-></input>' | ||
, 'charref: special <input value=asda¬*=c></input>' | ||
@@ -17,2 +19,4 @@ , 'charref: special <input value=asda¬=c></input>' | ||
, 'charref: non-special <input value=asda∉=c></input>' | ||
, 'charref: special ¬(' | ||
, 'charref: special ¬-' | ||
, 'charref: special ¬*=c in data' | ||
@@ -19,0 +23,0 @@ , 'charref: special ¬=c in data' |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Native code
Supply chain riskContains native code (e.g., compiled binaries or shared libraries). Including native code can obscure malicious behavior.
Found 1 instance in 1 package
667
119
0
29126
12