@@ -1,2 +0,2 @@
		module.exports = { chunks:require ('./tiny-lexer') }

		const lexer = require('./tiny-lexer')
		module.exports = { chunks:lexer.tokenize, tokenTypes: lexer.tokenTypes }

176

lib/tiny-lexer.js

		"use strict"
		module.exports = tokenize

		const log = console.log.bind (console)
		@@ -10,25 +10,27 @@

		const T_att_name = 'attribute-name'
		, T_att_equals = 'attribute-equals'
		, T_att_value_start = 'attribute-value-start'
		, T_att_value_data = 'attribute-value-data'
		, T_att_value_end = 'attribute-value-end'
		, T_comment_start = 'comment-start'
		, T_comment_start_bogus = 'comment-start-bogus'
		, T_comment_data = 'comment-data'
		, T_comment_end = 'comment-end'
		, T_comment_end_bogus = 'comment-end-bogus'
		, T_startTag_start = 'startTag-start'
		, T_endTag_start = 'endTag-start'
		, T_tag_end = 'tag-end'
		, T_tag_end_close = 'tag-end-autoclose'
		, T_charRef_decimal = 'charRef-decimal'
		, T_charRef_hex = 'charRef-hex'
		, T_charRef_named = 'charRef-named'
		, T_unescaped = 'unescaped'
		, T_space = 'space'
		, T_data = 'data'
		, T_rcdata = 'rcdata'
		, T_rawtext = 'rawtext'
		, T_plaintext = 'plaintext'
		const tokenTypes = {
		attributeName: 'attribute-name',
		attributeAssign: 'attribute-assign',
		attributeValueStart: 'attribute-value-start',
		attributeValueData : 'attribute-value-data',
		attributeValueEnd: 'attribute-value-end',
		commentStart: 'comment-start',
		commentStartBogus: 'comment-start-bogus',
		commentData: 'comment-data',
		commentEnd: 'comment-end',
		commentEndBogus: 'comment-end-bogus',
		startTagStart: 'startTag-start',
		endTagStart: 'endTag-start',
		tagEnd: 'tag-end',
		tagEndClose: 'tag-end-autoclose',
		charRefDecimal: 'charRef-decimal',
		charRefHex: 'charRef-hex',
		charRefNamed: 'charRef-named',
		unescaped: 'unescaped',
		space: 'space',
		data: 'data',
		rcdata: 'rcdata',
		rawtext: 'rawtext',
		plaintext: 'plaintext'
		}

		@@ -47,85 +49,85 @@


		const T = tokenTypes
		const grammar =
		{ data: [
		{ if: STARTTAG_START, emit: T_startTag_start, goto: startTag },
		{ if: ENDTAG_START, emit: T_endTag_start, goto:'beforeAtt' },
		//{ if: DOCTYPE_START, emit: T_doctype_start, goto:'beforeName' }, // before doctype name
		{ if: '<!--', emit: T_comment_start, goto:'commentStart' },
		{ if: '<[/!?]', emit: T_comment_start_bogus,goto:'bogusComment' },
		{ if: '[^<&]+', emit: T_data },
		{ if: '<', emit: T_unescaped },
		{ emit: T_data, goto: charRefIn }],
		{ if: STARTTAG_START, emit: T.startTagStart, goto: startTag },
		{ if: ENDTAG_START, emit: T.endTagStart, goto:'beforeAtt' },
		//{ if: DOCTYPE_START, emit: T.doctype_start, goto:'beforeName' }, // before doctype name
		{ if: '<!--', emit: T.commentStart, goto:'commentStart' },
		{ if: '<[/!?]', emit: T.commentStartBogus, goto:'bogusComment' },
		{ if: '[^<&]+', emit: T.data },
		{ if: '<', emit: T.unescaped },
		{ emit: T.data, goto: charRefIn }],

		rawtext: [
		{ if: ENDTAG_START, emit: maybeEndTagT, goto: maybeEndTag },
		{ if: '.[^<]*', emit: T_rawtext }],
		{ if: ENDTAG_START, emit: maybeEndTagT, goto: maybeEndTag },
		{ if: '.[^<]*', emit: T.rawtext }],

		rcdata: [
		{ if: ENDTAG_START, emit: maybeEndTagT, goto: maybeEndTag },
		{ if: '<', emit: T_unescaped },
		{ if: '[^<&]+', emit: T_rcdata },
		{ emit: T_rcdata, goto: charRefIn }],
		{ if: ENDTAG_START, emit: maybeEndTagT, goto: maybeEndTag },
		{ if: '<', emit: T.unescaped },
		{ if: '[^<&]+', emit: T.rcdata },
		{ emit: T.rcdata, goto: charRefIn }],

		plaintext: [
		{ if:'.+', emit: T_plaintext }],
		{ if:'.+', emit: T.plaintext }],

		charRef: [
		{ if: CHARREF_DEC, emit: T_charRef_decimal, goto: context },
		{ if: CHARREF_HEX, emit: T_charRef_hex, goto: context },
		{ if: CHARREF_NAME, emit: T_charRef_named, goto: context },
		{ if: '&', emit: T_unescaped, goto: context }],
		{ if: CHARREF_DEC, emit: T.charRefDecimal, goto: context },
		{ if: CHARREF_HEX, emit: T.charRefHex, goto: context },
		{ if: CHARREF_NAME, emit: T.charRefNamed, goto: context },
		{ if: '&', emit: T.unescaped, goto: context }],

		beforeAtt: [
		{ if: '>', emit: T_tag_end, goto: content },
		{ if: '/>', emit: T_tag_end_close, goto: content },
		{ if: '[\t\n\f ]+', emit: T_space, },
		{ if: '/+(?!>)', emit: T_space, }, // TODO, test / check with spec
		{ if: ATTNAME, emit: T_att_name, goto:'afterAttName' }],
		{ if: '>', emit: T.tagEnd, goto: content },
		{ if: '/>', emit: T.tagEndClose, goto: content },
		{ if: '[\t\n\f ]+', emit: T.space, },
		{ if: '/+(?!>)', emit: T.space, }, // TODO, test / check with spec
		{ if: ATTNAME, emit: T.attributeName, goto:'afterAttName' }],

		afterAttName: [
		{ if: '>', emit: T_tag_end, goto: content },
		{ if: '/>', emit: T_tag_end_close, goto: content },
		{ if: '=[\t\n\f ]*', emit: T_att_equals, goto:'attValue' },
		{ if: '/+(?!>)', emit: T_space, goto:'beforeAtt' },
		{ if: '[\t\n\f ]+', emit: T_space },
		{ if: ATTNAME, emit: T_att_name }],
		{ if: '>', emit: T.tagEnd, goto: content },
		{ if: '/>', emit: T.tagEndClose, goto: content },
		{ if: '=[\t\n\f ]*', emit: T.attributeAssign, goto:'attValue' },
		{ if: '/+(?!>)', emit: T.space, goto:'beforeAtt' },
		{ if: '[\t\n\f ]+', emit: T.space },
		{ if: ATTNAME, emit: T.attributeName }],

		attValue: [ // 'equals' has eaten all the space
		{ if: '>' , emit: T_tag_end, goto: content },
		{ if: '"' , emit: T_att_value_start, goto:'doubleQuoted' },
		{ if: "'" , emit: T_att_value_start, goto:'singleQuoted' },
		{ emit: T_att_value_start, goto:'unquoted' }],
		{ if: '>' , emit: T.tagEnd, goto: content },
		{ if: '"' , emit: T.attributeValueStart, goto:'doubleQuoted' },
		{ if: "'" , emit: T.attributeValueStart, goto:'singleQuoted' },
		{ emit: T.attributeValueStart, goto:'unquoted' }],

		unquoted: [
		{ if: ATT_UNQUOT, emit: T_att_value_data },
		{ if: '(?=[>\t\n\f ])', emit: T_att_value_end, goto:'beforeAtt' },
		{ emit: T_att_value_data, goto: charRefIn }],
		{ if: ATT_UNQUOT, emit: T.attributeValueData },
		{ if: '(?=[>\t\n\f ])', emit: T.attributeValueEnd, goto:'beforeAtt' },
		{ emit: T.attributeValueData, goto: charRefIn }],

		doubleQuoted: [
		{ if: '[^"&]+', emit: T_att_value_data },
		{ if: '"', emit: T_att_value_end, goto:'beforeAtt' },
		{ emit: T_att_value_data, goto: charRefIn }],
		{ if: '[^"&]+', emit: T.attributeValueData },
		{ if: '"', emit: T.attributeValueEnd, goto:'beforeAtt' },
		{ emit: T.attributeValueData, goto: charRefIn }],

		singleQuoted: [
		{ if: "[^'&]+", emit: T_att_value_data },
		{ if: "'", emit: T_att_value_end, goto:'beforeAtt' },
		{ emit: T_att_value_data, goto: charRefIn }],
		{ if: "[^'&]+", emit: T.attributeValueData },
		{ if: "'", emit: T.attributeValueEnd, goto:'beforeAtt' },
		{ emit: T.attributeValueData, goto: charRefIn }],

		bogusComment: [
		{ if: '[^>]+', emit: T_comment_data, goto:'bogusComment' },
		{ if: '>', emit: T_comment_end_bogus, goto: content }],
		{ if: '[^>]+', emit: T.commentData, goto:'bogusComment' },
		{ if: '>', emit: T.commentEndBogus, goto: content }],

		commentStart: [
		{ if: '-?>', emit: T_comment_end, goto: content },
		{ if: '--!?>', emit: T_comment_end, goto: content },
		{ if: '--!', emit: T_comment_data, goto:'comment' },
		{ if: '--?', emit: T_comment_data, goto:'comment' },
		{ if: '[^>-][^-]*', emit: T_comment_data, goto:'comment' }],
		{ if: '-?>', emit: T.commentEnd, goto: content },
		{ if: '--!?>', emit: T.commentEnd, goto: content },
		{ if: '--!', emit: T.commentData, goto:'comment' },
		{ if: '--?', emit: T.commentData, goto:'comment' },
		{ if: '[^>-][^-]*', emit: T.commentData, goto:'comment' }],

		comment: [
		{ if: '--!?>', emit: T_comment_end, goto: content },
		{ if: '--!' , emit: T_comment_data },
		{ if: '--?' , emit: T_comment_data },
		{ if: '[^-]+', emit: T_comment_data }]
		{ if: '--!?>', emit: T.commentEnd, goto: content },
		{ if: '--!' , emit: T.commentData },
		{ if: '--?' , emit: T.commentData },
		{ if: '[^-]+', emit: T.commentData }]
		}
		@@ -150,4 +152,5 @@

		function CustomState () {
		function PrivateState () {
		this.content = 'data' // one of { data, rcdata, rawtext, unquoted, doubleQuoted, singleQuoted }
		this.context = 'data' // likewise
		this.tagName // the last seen 'startTag-start' name
		@@ -174,3 +177,4 @@ }
		this.content = 'data'
		return T_endTag_start }
		return T.endTagStart
		}
		else return this.content // TODO careful, this is a token type, not a state!
		@@ -182,3 +186,3 @@ }
		this.content = 'data'
		return 'beforeAtt'
		return 'beforeAtt'
		}
		@@ -238,3 +242,3 @@ else return symbol
		const self = { value: null, done: false, next: next, state: custom }
		self [Symbol.iterator] = function () { return self }
		self [Symbol.iterator] = function () { return self } // TODO: decide on API
		return self
		@@ -309,3 +313,3 @@

		const chunker = new TinyLexer (grammar, 'data', CustomState)
		const chunker = new TinyLexer (grammar, 'data', PrivateState)

		@@ -316,1 +320,5 @@ function tokenize (input) {


		// Exports
		module.exports.tokenize = tokenize
		module.exports.tokenTypes = tokenTypes

package.json

		{
		"name": "tiny-html-lexer",
		"version": "0.8.1",
		"version": "0.8.3",
		"description": "A tiny HTML5 lexer",
		@@ -5,0 +5,0 @@ "main": "lib/index.js",

test/test-colors.js

		"use strict"

		const tokenize = require ('../lib/tiny-lexer')
		const tokenize = require ('../lib/tiny-lexer').tokenize
		, data = require ('./data/samples')
		@@ -5,0 +5,0 @@ , { head, renderTokens, flush, flatten } = require ('./templates')

test/style/tokens.css

Sorry, the diff of this file is not supported yet

tiny-html-lexer - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics