Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

tiny-html-lexer

Package Overview
Dependencies
Maintainers
1
Versions
10
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

tiny-html-lexer - npm Package Compare versions

Comparing version 0.8.1 to 0.8.3

lib/index.d.ts

4

lib/index.js

@@ -1,2 +0,2 @@

module.exports = { chunks:require ('./tiny-lexer') }
const lexer = require('./tiny-lexer')
module.exports = { chunks:lexer.tokenize, tokenTypes: lexer.tokenTypes }
"use strict"
module.exports = tokenize
const log = console.log.bind (console)

@@ -10,25 +10,27 @@

const T_att_name = 'attribute-name'
, T_att_equals = 'attribute-equals'
, T_att_value_start = 'attribute-value-start'
, T_att_value_data = 'attribute-value-data'
, T_att_value_end = 'attribute-value-end'
, T_comment_start = 'comment-start'
, T_comment_start_bogus = 'comment-start-bogus'
, T_comment_data = 'comment-data'
, T_comment_end = 'comment-end'
, T_comment_end_bogus = 'comment-end-bogus'
, T_startTag_start = 'startTag-start'
, T_endTag_start = 'endTag-start'
, T_tag_end = 'tag-end'
, T_tag_end_close = 'tag-end-autoclose'
, T_charRef_decimal = 'charRef-decimal'
, T_charRef_hex = 'charRef-hex'
, T_charRef_named = 'charRef-named'
, T_unescaped = 'unescaped'
, T_space = 'space'
, T_data = 'data'
, T_rcdata = 'rcdata'
, T_rawtext = 'rawtext'
, T_plaintext = 'plaintext'
const tokenTypes = {
attributeName: 'attribute-name',
attributeAssign: 'attribute-assign',
attributeValueStart: 'attribute-value-start',
attributeValueData : 'attribute-value-data',
attributeValueEnd: 'attribute-value-end',
commentStart: 'comment-start',
commentStartBogus: 'comment-start-bogus',
commentData: 'comment-data',
commentEnd: 'comment-end',
commentEndBogus: 'comment-end-bogus',
startTagStart: 'startTag-start',
endTagStart: 'endTag-start',
tagEnd: 'tag-end',
tagEndClose: 'tag-end-autoclose',
charRefDecimal: 'charRef-decimal',
charRefHex: 'charRef-hex',
charRefNamed: 'charRef-named',
unescaped: 'unescaped',
space: 'space',
data: 'data',
rcdata: 'rcdata',
rawtext: 'rawtext',
plaintext: 'plaintext'
}

@@ -47,85 +49,85 @@

const T = tokenTypes
const grammar =
{ data: [
{ if: STARTTAG_START, emit: T_startTag_start, goto: startTag },
{ if: ENDTAG_START, emit: T_endTag_start, goto:'beforeAtt' },
//{ if: DOCTYPE_START, emit: T_doctype_start, goto:'beforeName' }, // before doctype name
{ if: '<!--', emit: T_comment_start, goto:'commentStart' },
{ if: '<[/!?]', emit: T_comment_start_bogus,goto:'bogusComment' },
{ if: '[^<&]+', emit: T_data },
{ if: '<', emit: T_unescaped },
{ emit: T_data, goto: charRefIn }],
{ if: STARTTAG_START, emit: T.startTagStart, goto: startTag },
{ if: ENDTAG_START, emit: T.endTagStart, goto:'beforeAtt' },
//{ if: DOCTYPE_START, emit: T.doctype_start, goto:'beforeName' }, // before doctype name
{ if: '<!--', emit: T.commentStart, goto:'commentStart' },
{ if: '<[/!?]', emit: T.commentStartBogus, goto:'bogusComment' },
{ if: '[^<&]+', emit: T.data },
{ if: '<', emit: T.unescaped },
{ emit: T.data, goto: charRefIn }],
rawtext: [
{ if: ENDTAG_START, emit: maybeEndTagT, goto: maybeEndTag },
{ if: '.[^<]*', emit: T_rawtext }],
{ if: ENDTAG_START, emit: maybeEndTagT, goto: maybeEndTag },
{ if: '.[^<]*', emit: T.rawtext }],
rcdata: [
{ if: ENDTAG_START, emit: maybeEndTagT, goto: maybeEndTag },
{ if: '<', emit: T_unescaped },
{ if: '[^<&]+', emit: T_rcdata },
{ emit: T_rcdata, goto: charRefIn }],
{ if: ENDTAG_START, emit: maybeEndTagT, goto: maybeEndTag },
{ if: '<', emit: T.unescaped },
{ if: '[^<&]+', emit: T.rcdata },
{ emit: T.rcdata, goto: charRefIn }],
plaintext: [
{ if:'.+', emit: T_plaintext }],
{ if:'.+', emit: T.plaintext }],
charRef: [
{ if: CHARREF_DEC, emit: T_charRef_decimal, goto: context },
{ if: CHARREF_HEX, emit: T_charRef_hex, goto: context },
{ if: CHARREF_NAME, emit: T_charRef_named, goto: context },
{ if: '&', emit: T_unescaped, goto: context }],
{ if: CHARREF_DEC, emit: T.charRefDecimal, goto: context },
{ if: CHARREF_HEX, emit: T.charRefHex, goto: context },
{ if: CHARREF_NAME, emit: T.charRefNamed, goto: context },
{ if: '&', emit: T.unescaped, goto: context }],
beforeAtt: [
{ if: '>', emit: T_tag_end, goto: content },
{ if: '/>', emit: T_tag_end_close, goto: content },
{ if: '[\t\n\f ]+', emit: T_space, },
{ if: '/+(?!>)', emit: T_space, }, // TODO, test / check with spec
{ if: ATTNAME, emit: T_att_name, goto:'afterAttName' }],
{ if: '>', emit: T.tagEnd, goto: content },
{ if: '/>', emit: T.tagEndClose, goto: content },
{ if: '[\t\n\f ]+', emit: T.space, },
{ if: '/+(?!>)', emit: T.space, }, // TODO, test / check with spec
{ if: ATTNAME, emit: T.attributeName, goto:'afterAttName' }],
afterAttName: [
{ if: '>', emit: T_tag_end, goto: content },
{ if: '/>', emit: T_tag_end_close, goto: content },
{ if: '=[\t\n\f ]*', emit: T_att_equals, goto:'attValue' },
{ if: '/+(?!>)', emit: T_space, goto:'beforeAtt' },
{ if: '[\t\n\f ]+', emit: T_space },
{ if: ATTNAME, emit: T_att_name }],
{ if: '>', emit: T.tagEnd, goto: content },
{ if: '/>', emit: T.tagEndClose, goto: content },
{ if: '=[\t\n\f ]*', emit: T.attributeAssign, goto:'attValue' },
{ if: '/+(?!>)', emit: T.space, goto:'beforeAtt' },
{ if: '[\t\n\f ]+', emit: T.space },
{ if: ATTNAME, emit: T.attributeName }],
attValue: [ // 'equals' has eaten all the space
{ if: '>' , emit: T_tag_end, goto: content },
{ if: '"' , emit: T_att_value_start, goto:'doubleQuoted' },
{ if: "'" , emit: T_att_value_start, goto:'singleQuoted' },
{ emit: T_att_value_start, goto:'unquoted' }],
{ if: '>' , emit: T.tagEnd, goto: content },
{ if: '"' , emit: T.attributeValueStart, goto:'doubleQuoted' },
{ if: "'" , emit: T.attributeValueStart, goto:'singleQuoted' },
{ emit: T.attributeValueStart, goto:'unquoted' }],
unquoted: [
{ if: ATT_UNQUOT, emit: T_att_value_data },
{ if: '(?=[>\t\n\f ])', emit: T_att_value_end, goto:'beforeAtt' },
{ emit: T_att_value_data, goto: charRefIn }],
{ if: ATT_UNQUOT, emit: T.attributeValueData },
{ if: '(?=[>\t\n\f ])', emit: T.attributeValueEnd, goto:'beforeAtt' },
{ emit: T.attributeValueData, goto: charRefIn }],
doubleQuoted: [
{ if: '[^"&]+', emit: T_att_value_data },
{ if: '"', emit: T_att_value_end, goto:'beforeAtt' },
{ emit: T_att_value_data, goto: charRefIn }],
{ if: '[^"&]+', emit: T.attributeValueData },
{ if: '"', emit: T.attributeValueEnd, goto:'beforeAtt' },
{ emit: T.attributeValueData, goto: charRefIn }],
singleQuoted: [
{ if: "[^'&]+", emit: T_att_value_data },
{ if: "'", emit: T_att_value_end, goto:'beforeAtt' },
{ emit: T_att_value_data, goto: charRefIn }],
{ if: "[^'&]+", emit: T.attributeValueData },
{ if: "'", emit: T.attributeValueEnd, goto:'beforeAtt' },
{ emit: T.attributeValueData, goto: charRefIn }],
bogusComment: [
{ if: '[^>]+', emit: T_comment_data, goto:'bogusComment' },
{ if: '>', emit: T_comment_end_bogus, goto: content }],
{ if: '[^>]+', emit: T.commentData, goto:'bogusComment' },
{ if: '>', emit: T.commentEndBogus, goto: content }],
commentStart: [
{ if: '-?>', emit: T_comment_end, goto: content },
{ if: '--!?>', emit: T_comment_end, goto: content },
{ if: '--!', emit: T_comment_data, goto:'comment' },
{ if: '--?', emit: T_comment_data, goto:'comment' },
{ if: '[^>-][^-]*', emit: T_comment_data, goto:'comment' }],
{ if: '-?>', emit: T.commentEnd, goto: content },
{ if: '--!?>', emit: T.commentEnd, goto: content },
{ if: '--!', emit: T.commentData, goto:'comment' },
{ if: '--?', emit: T.commentData, goto:'comment' },
{ if: '[^>-][^-]*', emit: T.commentData, goto:'comment' }],
comment: [
{ if: '--!?>', emit: T_comment_end, goto: content },
{ if: '--!' , emit: T_comment_data },
{ if: '--?' , emit: T_comment_data },
{ if: '[^-]+', emit: T_comment_data }]
{ if: '--!?>', emit: T.commentEnd, goto: content },
{ if: '--!' , emit: T.commentData },
{ if: '--?' , emit: T.commentData },
{ if: '[^-]+', emit: T.commentData }]
}

@@ -150,4 +152,5 @@

function CustomState () {
function PrivateState () {
this.content = 'data' // one of { data, rcdata, rawtext, unquoted, doubleQuoted, singleQuoted }
this.context = 'data' // likewise
this.tagName // the last seen 'startTag-start' name

@@ -174,3 +177,4 @@ }

this.content = 'data'
return T_endTag_start }
return T.endTagStart
}
else return this.content // TODO careful, this is a token type, not a state!

@@ -182,3 +186,3 @@ }

this.content = 'data'
return 'beforeAtt'
return 'beforeAtt'
}

@@ -238,3 +242,3 @@ else return symbol

const self = { value: null, done: false, next: next, state: custom }
self [Symbol.iterator] = function () { return self }
self [Symbol.iterator] = function () { return self } // TODO: decide on API
return self

@@ -309,3 +313,3 @@

const chunker = new TinyLexer (grammar, 'data', CustomState)
const chunker = new TinyLexer (grammar, 'data', PrivateState)

@@ -316,1 +320,5 @@ function tokenize (input) {

// Exports
module.exports.tokenize = tokenize
module.exports.tokenTypes = tokenTypes
{
"name": "tiny-html-lexer",
"version": "0.8.1",
"version": "0.8.3",
"description": "A tiny HTML5 lexer",

@@ -5,0 +5,0 @@ "main": "lib/index.js",

"use strict"
const tokenize = require ('../lib/tiny-lexer')
const tokenize = require ('../lib/tiny-lexer').tokenize
, data = require ('./data/samples')

@@ -5,0 +5,0 @@ , { head, renderTokens, flush, flatten } = require ('./templates')

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc