Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

tiny-html-lexer

Package Overview
Dependencies
Maintainers
1
Versions
10
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

tiny-html-lexer - npm Package Compare versions

Comparing version 0.8.3 to 0.8.4

35

lib/tiny-lexer.js

@@ -43,3 +43,3 @@ "use strict"

const CHARREF_HEX = '&#[xX][0-9A-Fa-f]+;?'
const CHARREF_NAME = '&[A-Za-z][A-Za-z0-9]*;?'
const CHARREF_NAMED = '&[A-Za-z][A-Za-z0-9]*;?'
const ATTNAME = '.[^>/\t\n\f =]*' /* '[^>/\t\n\f ][^>/\t\n\f =]*' */

@@ -49,2 +49,9 @@ const ATT_UNQUOT = '[^&>\t\n\f ]+'

// The below generated by preprocessing the list of named character references;
// Legacy charrefs may occur without terminating semicolon but not as a prefix
// of a known named reference.
const CHARREF_CONTD = '&(?:copysr|centerdot|divideontimes|[gl]t(?:quest|dot|cir|cc)|[gl]trPar|gtr(?:dot|less|eqqless|eqless|approx|arr|sim)|ltr(?:i|if|ie|mes)|ltlarr|lthree|notin(?:dot|E|v[abc])?|notni(?:v[abc])?|parallel|times(?:bar|d|b));'
const CHARREF_LEGACY = '&(?:[AEIOUYaeiouy]?acute|[AEIOUaeiou](?:grave|circ|uml)|y?uml|[ANOano]tilde|[Aa]ring|[Oo]slash|[Cc]?cedil|brvbar|curren|divide|frac(?:12|14|34)|iquest|middot|plusmn|(?:AE|ae|sz)lig|[lr]aquo|iexcl|micro|pound|THORN|thorn|times|COPY|copy|cent|macr|nbsp|ord[fm]|para|QUOT|quot|sect|sup[123]|AMP|amp|ETH|eth|REG|reg|deg|not|shy|yen|GT|gt|LT|lt)'
const T = tokenTypes

@@ -78,3 +85,5 @@ const grammar =

{ if: CHARREF_HEX, emit: T.charRefHex, goto: context },
{ if: CHARREF_NAME, emit: T.charRefNamed, goto: context },
{ if: CHARREF_CONTD, emit: T.charRefNamed, goto: context },
{ if: CHARREF_LEGACY, emit: legacyCharRefT, goto: context }, // TODO special caase in attribute
{ if: CHARREF_NAMED, emit: T.charRefNamed, goto: context },
{ if: '&', emit: T.unescaped, goto: context }],

@@ -153,6 +162,8 @@

function PrivateState () {
function PrivateState (input) {
this.content = 'data' // one of { data, rcdata, rawtext, unquoted, doubleQuoted, singleQuoted }
this.context = 'data' // likewise
this.tagName // the last seen 'startTag-start' name
this.position = 0
this.input = input
}

@@ -164,3 +175,3 @@

this.content = tagName in content_map ? content_map[tagName] : 'data'
return 'beforeAtt'
return 'beforeAtt'
}

@@ -176,2 +187,16 @@

// From the spec;
// "If the character reference was consumed as part of an attribute,
// and the last character matched is not a U+003B SEMICOLON character (;),
// and the next input character is either a U+003D EQUALS SIGN character (=) or an ASCII alphanumeric,
// then, for historical reasons, flush code points consumed as a character reference and switch to the return state."
function legacyCharRefT () {
const x = this.context, c = this.input[this.position]
if ((x === 'unquoted' || x === 'doubleQuoted' || x === 'singleQuoted') && /[a-zA-Z0-9=]/.test(c)) {
return T.attributeValueData
}
return T.charRefNamed
}
function maybeEndTagT (_, chunk) {

@@ -237,3 +262,3 @@ if (chunk.substr (2) === this.tagName) {

function tokenize (input) {
const custom = new CustomState ()
const custom = new CustomState (input)
let symbol = start

@@ -240,0 +265,0 @@ , state = states [symbol]

2

package.json
{
"name": "tiny-html-lexer",
"version": "0.8.3",
"version": "0.8.4",
"description": "A tiny HTML5 lexer",

@@ -5,0 +5,0 @@ "main": "lib/index.js",

@@ -46,3 +46,3 @@ A tiny HTML5 lexer

- `"attribute-name"`
- `"attribute-equals"`
- `"attribute-assign"`
- `"attribute-value-start"`

@@ -81,2 +81,20 @@ - `"attribute-value-data"`

Changelog
------------
### 0.8.4
- Correct handling of legacy (unterminated) named character references.
### 0.8.3
- Added typescript annotations.
- Token type `attribute-equals` has been renamed to `attribute-assign`.
- Renamed export `tokens` to `tokenTypes`.
### 0.8.1
- Fix for incorrect parsing of slashes between attributes.
### 0.8.0
- First public release.
Some implementation details

@@ -83,0 +101,0 @@ ---------------------------

@@ -10,2 +10,4 @@

, 'charref: decimal non-terminated &#110 in data'
, 'charref: special <input value=asda&not(></input>'
, 'charref: special <input value=asda&not-></input>'
, 'charref: special <input value=asda&not*=c></input>'

@@ -17,2 +19,4 @@ , 'charref: special <input value=asda&not=c></input>'

, 'charref: non-special <input value=asda&notin;=c></input>'
, 'charref: special &not('
, 'charref: special &not-'
, 'charref: special &not*=c in data'

@@ -19,0 +23,0 @@ , 'charref: special &not=c in data'

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc