Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

tiny-html-lexer

Package Overview
Dependencies
Maintainers
1
Versions
10
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

tiny-html-lexer - npm Package Compare versions

Comparing version 0.8.0 to 0.8.1

10

lib/tiny-lexer.js

@@ -41,6 +41,8 @@ "use strict"

const CHARREF_HEX = '&#[xX][0-9A-Fa-f]+;?'
const CHARREF_NAME = '&[A-Za-z0-9]+;?'
const CHARREF_NAME = '&[A-Za-z][A-Za-z0-9]*;?'
const ATTNAME = '.[^>/\t\n\f =]*' /* '[^>/\t\n\f ][^>/\t\n\f =]*' */
const ATT_UNQUOT = '[^&>\t\n\f ]+'
const DOCTYPE_START = '<![Dd][Oo][Cc][Tt][Yy][Pp][Ee]'
const grammar =

@@ -50,2 +52,3 @@ { data: [

{ if: ENDTAG_START, emit: T_endTag_start, goto:'beforeAtt' },
//{ if: DOCTYPE_START, emit: T_doctype_start, goto:'beforeName' }, // before doctype name
{ if: '<!--', emit: T_comment_start, goto:'commentStart' },

@@ -79,3 +82,4 @@ { if: '<[/!?]', emit: T_comment_start_bogus,goto:'bogusComment' },

{ if: '/>', emit: T_tag_end_close, goto: content },
{ if: '[/\t\n\f ]+', emit: T_space, },
{ if: '[\t\n\f ]+', emit: T_space, },
{ if: '/+(?!>)', emit: T_space, }, // TODO, test / check with spec
{ if: ATTNAME, emit: T_att_name, goto:'afterAttName' }],

@@ -87,3 +91,3 @@

{ if: '=[\t\n\f ]*', emit: T_att_equals, goto:'attValue' },
{ if: '/+', emit: T_space, goto:'beforeAtt' },
{ if: '/+(?!>)', emit: T_space, goto:'beforeAtt' },
{ if: '[\t\n\f ]+', emit: T_space },

@@ -90,0 +94,0 @@ { if: ATTNAME, emit: T_att_name }],

{
"name": "tiny-html-lexer",
"version": "0.8.0",
"version": "0.8.1",
"description": "A tiny HTML5 lexer",

@@ -24,4 +24,12 @@ "main": "lib/index.js",

"type": "git",
"url": "https://github.com/alwinb/tiny-html-lexer"
"url": "git+https://github.com/alwinb/tiny-html-lexer.git"
},
"bugs": {
"url": "https://github.com/alwinb/tiny-html-lexer/issues"
},
"homepage": "https://github.com/alwinb/tiny-html-lexer#readme",
"dependencies": {},
"devDependencies": {
"tagscript": "^0.3.0"
}
}

@@ -21,19 +21,23 @@ A tiny HTML5 lexer

let tinyhtml = require ('tiny-html-lexer')
let stream = tinyhtml.chunks ('<span>Hello, world</span>')
for (let chunk of stream)
console.log (stream)
```javascript
let tinyhtml = require ('tiny-html-lexer')
let stream = tinyhtml.chunks ('<span>Hello, world</span>')
for (let chunk of stream)
console.log (chunk)
```
Alternatively, without `for .. of`:
Alternatively, without `for .. of`
(should work just fine in ES5 environments):
let stream = tinyhtml.chunks ('<span>Hello, world</span>') .next ()
while (!stream.done) {
console.log (stream.value)
stream.next ()
}
```javascript
let stream = tinyhtml.chunks ('<span>Hello, world</span>') .next ()
while (!stream.done) {
console.log (stream.value)
stream.next ()
}
```
Each call to `next ()` mutates and returns the iterator object itself,
rather than the usual separate `{ value, done }` objects. It seems superfluous
to create new wrapper objects `{ value, done }` for each chunk, so I went with
this instead.
to create new wrapper objects for each chunk, so I went with this instead.

@@ -92,2 +96,7 @@ Tokens are tuples (arrays) `[type, chunk]` where type is one of

License
-----------
MIT.
Enjoy!
var samples =
[ 'named &amp; char ref in data'
, 'non-terminated named charref in data char ref in &amp data'
, 'non-terminated named charref in data char ref in &ampa data'
, 'hexadecimal charref in data hexadecimal ref &#xccc; in data'
, 'non-terminated Hexadecimal charref in data hexadecimal ref &#xccc in data'
, 'decimal charref in data decimal ref &#1092; in data'
, 'non-terminated Decimal charref in data decimal ref &#110 in data'
, 'special charref <input value=asda&not*=c></input>'
, 'special charref <input value=asda&not=c></input>'
, 'special charref <input value="asda&notit; I tell you"></input>'
, 'non-special charref <input value=asda&notin*=c></input>'
, 'non-special charref <input value=asda&notin=c></input>'
, 'non-special charref <input value=asda&notin;=c></input>'
, 'special charref in data &not*=c'
, 'special charref in data &not=c'
, 'special charref in data &notit; I tell you'
, 'non-special charref in data charref &notin*=c'
, 'non-special charref in data charref &notin=c'
, 'non-special charref in data charref &notin;=c'
, 'named charref in attribute <input value="you &amp; me"/> and more'
, 'named charref in attribute <input value=\'you &amp; me\'/> and more'
, 'named charref in attribute <input value=you&#12me /> and more'
, 'named charref in attribute <input value=&amp;me /> and more'
, 'named charref in attribute <input value=&amp attr=val /> and more'
, 'named charref in attribute <input value=&ampo attr=val /> and more'
, 'bogus charref in attribute <input value="you &# am me"/> and more'
, 'bogus charref in attribute <input value=\'you &# amp me\'/> and more'
, 'bogus charref in attribute <input value=you&x ampme /> and more'
[ 'charref: named &amp; in data'
, 'charref: named non-terminated &amp in data'
, 'charref: named non-terminated &ampa in data'
, 'charref: hexadecimal &#xccc; in data'
, 'charref: hexadecimal non-terminated &#xccc in data'
, 'charref: decimal &#1092; in data'
, 'charref: decimal non-terminated &#110 in data'
, 'charref: special <input value=asda&not*=c></input>'
, 'charref: special <input value=asda&not=c></input>'
, 'charref: special <input value="asda&notit; I tell you"></input>'
, 'charref: non-special <input value=asda&notin*=c></input>'
, 'charref: non-special <input value=asda&notin=c></input>'
, 'charref: non-special <input value=asda&notin;=c></input>'
, 'charref: special &not*=c in data'
, 'charref: special &not=c in data'
, 'charref: special &notit; I tell you, in data'
, 'charref: special &notin; I tell you, in data'
, 'charref: non-special &notin*=c in data'
, 'charref: non-special &notin=c in data'
, 'charref: non-special &notin;=c in data'
, 'charref: named <input value="you &amp; me"/> in attribute'
, 'charref: named <input value=\'you &amp; me\'/> in attribute'
, 'charref: named <input value=you&#12me /> in attribute'
, 'charref: named <input value=&amp;me /> in attribute'
, 'charref: named <input value=&amp attr=val /> in attribute'
, 'charref: named <input value=&ampo attr=val /> in attribute'
, 'charref: bogus <input value="you &# am me"/> in attribute'
, 'charref: bogus <input value=\'you &# amp me\'/> in attribute'
, 'charref: bogus <input value=you&x ampme /> in attribute'
, 'charref: ampHash &amp;# such'
, ''
, 'rcdata <textarea> asdf & &amp; <textareaNot </textarea> and more'

@@ -44,23 +48,34 @@ , 'rcdata2 <textarea> asdf & &amp; </textarea( and not ending> it'

, 'bad end tag <div style=color:blue> This is blue </ div> And this too!'
, 'comment1 comment <!-- with -> within --> and subsequent data'
, 'comment2 comment <!-- with bogus end -> and subsequent data'
, 'comment3 <!-- Comment with -- double dash within --> and subsequent data'
, 'comment4 <!-- Comment with --!- weird stuff within --> and subsequent data'
, 'comment5 <!-- Comment with strange end --!> and subsequent data'
, 'comment6 <!--!> and such'
, 'comment7 <!--> and such'
, 'comment8 <!-> and such'
, 'comment9 <!---!> and such'
, 'comment10 <!----!> and such'
, 'closePlaintext hi <plaintext>asd<as &ap, </plaintext> cannot be ended'
, ''
, 'comment: <!weird markup declaration> and such'
, 'comment: <!> and such'
, 'comment: <?> and such'
, 'comment: </> and such'
, 'comment: <!-> and such'
, 'comment: <?-> and such'
, 'comment: <!-> and such'
, 'comment: <!--> and such'
, 'comment: <?--> and such'
, 'comment: <!--> and such'
, 'comment: <!--!> and such'
, 'comment: <!--> and such'
, 'comment: <!-> and such'
, 'comment: <!---!> and such'
, 'comment: <!----!> and such'
, 'comment: <!-- with -> within --> and subsequent data'
, 'comment: <!-- with bogus end -> and subsequent data'
, 'comment: <!-- Comment with -- double dash within --> and subsequent data'
, 'comment: <!-- Comment with --!- weird stuff within --> and subsequent data'
, 'comment: <!-- Comment with strange end --!> and subsequent data'
, 'bogus comment: <! with end !@> and subsequent data'
, 'bogus comment: </ with end !@> and subsequent data'
, 'bogus comment: <? with end !@> and subsequent data'
, 'bogus comment: <!- with end -> and subsequent data'
, ''
, 'missing space attribues connected <div name="a"name="b" >'
, 'nonalpha attribute weird template tag <div {name="a" name="b" >'
, 'bogus1 bogus comment <! with end !@> and subsequent data'
, 'bogus2 bogus comment </ with end !@> and subsequent data'
, 'bogus3 bogus comment <? with end !@> and subsequent data'
, 'bogus4 bogus comment <!- with end -> and subsequent data'
, 'bogus5 An empty bogus comment <!>'
, 'invalidMD <!weird markup declaration>'
, 'normalHtml This is <span class = "s1">html</span> Yeah!'
, 'autocloseAttempt This is <span / attr >html</span> Yeah!'
, 'closePlaintext hi <plaintext>asd<as &ap, </plaintext> cannot be ended'
, 'unescaped ampersand data & such'

@@ -70,7 +85,21 @@ , 'unescaped ampersand Hash data &# such'

, 'unescaped ampersand HashExZed data &#xz such'
, 'hexDigits data &#xa such'
, 'decimal charref data &#1 such'
, 'named charref data &name such'
, 'ampHash data &amp;# such'
, 'slashes <span/////name////=/blabla>'
, ''
, 'slashes: <span/>'
, 'slashes: <span name=foo//>'
, 'slashes: <div//>'
, 'slashes: <div/foo/bar//>'
, 'slashes: <span//>'
, 'slashes: <span />'
, 'slashes: <span <>'
, 'slashes: <span //>'
, 'slashes: <span / />'
, 'slashes: <span/////>'
, 'slashes: <span/////name////=/blabla>'
, 'slashes: <span / attr >foo bar</span>'
, 'slashes: <span name=/ >asdf'
, 'slashes: <span name=/>asdf'
, 'slashes: <span name=// />asdf'
, 'slashes: <span name= / />asdf'
, 'weirdEquals <span attr = / asd >content</span>'

@@ -80,5 +109,2 @@ , 'weirdEquals2 <span attr = @ asd >content</span>'

, 'weirdEquals4 <span attr @= asd >content</span>'
, 'dangerousSlash1 <span name=/ >asdf'
, 'dangerousSlash2 <span name=/>asdf'
, 'dangerousSlash3 <span name= / />asdf'
, 'missingValue <span name=>asdf'

@@ -85,0 +111,0 @@ , 'invalidAttributeValue1 <div class= =at >'

"use strict"
const walk = require ('./walk')
, { tag, end, render } = require ('./tagscript')
const { tag, end, renderTo } = require ('tagscript')
const log = console.log.bind (console)
module.exports = { head, renderTokens, flush }
module.exports = { head, renderTokens, flush, flatten }
function head (cssfile) { return function (contents) {

@@ -37,6 +36,2 @@ const header =

//
const log = console.log.bind (console)
function map (fn) { return function* (obj) {

@@ -49,4 +44,3 @@ for (let a of obj) yield fn (a)

try {
for (let a of obj)
process.stdout.write (render (a))
renderTo (process.stdout, obj)
process.exit (205)

@@ -58,17 +52,1 @@ }

}
function* flatten (obj) {
for (let a of (walk (obj, iterables)))
if (a.tag === 'leaf') yield a.value
}
function iterables (obj) {
return obj == null ? walk.leaf (obj)
: typeof obj[Symbol.iterator] === 'function' ? walk.shape (obj)
: obj instanceof Array ? walk.shape (obj)
: walk.leaf (obj)
}
"use strict"
const tokenize = require ('../lib/tiny-lexer')
, data = require ('./data/html')
, data = require ('./data/samples')
, { head, renderTokens, flush, flatten } = require ('./templates')

@@ -25,3 +25,3 @@

let samples = data.samples.concat (data.EOFSamples)
compose (flush, flatten, head ('file://'+__dirname+'/style/tokens.css?ao'), map (renderTokens), map (tokenize)) (samples)
compose (flush, head ('file://'+__dirname+'/style/tokens.css?ao'), map (renderTokens), map (tokenize)) (samples)
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc