tiny-html-lexer
Advanced tools
Comparing version 0.8.0 to 0.8.1
@@ -41,6 +41,8 @@ "use strict" | ||
const CHARREF_HEX = '&#[xX][0-9A-Fa-f]+;?' | ||
const CHARREF_NAME = '&[A-Za-z0-9]+;?' | ||
const CHARREF_NAME = '&[A-Za-z][A-Za-z0-9]*;?' | ||
const ATTNAME = '.[^>/\t\n\f =]*' /* '[^>/\t\n\f ][^>/\t\n\f =]*' */ | ||
const ATT_UNQUOT = '[^&>\t\n\f ]+' | ||
const DOCTYPE_START = '<![Dd][Oo][Cc][Tt][Yy][Pp][Ee]' | ||
const grammar = | ||
@@ -50,2 +52,3 @@ { data: [ | ||
{ if: ENDTAG_START, emit: T_endTag_start, goto:'beforeAtt' }, | ||
//{ if: DOCTYPE_START, emit: T_doctype_start, goto:'beforeName' }, // before doctype name | ||
{ if: '<!--', emit: T_comment_start, goto:'commentStart' }, | ||
@@ -79,3 +82,4 @@ { if: '<[/!?]', emit: T_comment_start_bogus,goto:'bogusComment' }, | ||
{ if: '/>', emit: T_tag_end_close, goto: content }, | ||
{ if: '[/\t\n\f ]+', emit: T_space, }, | ||
{ if: '[\t\n\f ]+', emit: T_space, }, | ||
{ if: '/+(?!>)', emit: T_space, }, // TODO, test / check with spec | ||
{ if: ATTNAME, emit: T_att_name, goto:'afterAttName' }], | ||
@@ -87,3 +91,3 @@ | ||
{ if: '=[\t\n\f ]*', emit: T_att_equals, goto:'attValue' }, | ||
{ if: '/+', emit: T_space, goto:'beforeAtt' }, | ||
{ if: '/+(?!>)', emit: T_space, goto:'beforeAtt' }, | ||
{ if: '[\t\n\f ]+', emit: T_space }, | ||
@@ -90,0 +94,0 @@ { if: ATTNAME, emit: T_att_name }], |
{ | ||
"name": "tiny-html-lexer", | ||
"version": "0.8.0", | ||
"version": "0.8.1", | ||
"description": "A tiny HTML5 lexer", | ||
@@ -24,4 +24,12 @@ "main": "lib/index.js", | ||
"type": "git", | ||
"url": "https://github.com/alwinb/tiny-html-lexer" | ||
"url": "git+https://github.com/alwinb/tiny-html-lexer.git" | ||
}, | ||
"bugs": { | ||
"url": "https://github.com/alwinb/tiny-html-lexer/issues" | ||
}, | ||
"homepage": "https://github.com/alwinb/tiny-html-lexer#readme", | ||
"dependencies": {}, | ||
"devDependencies": { | ||
"tagscript": "^0.3.0" | ||
} | ||
} |
@@ -21,19 +21,23 @@ A tiny HTML5 lexer | ||
let tinyhtml = require ('tiny-html-lexer') | ||
let stream = tinyhtml.chunks ('<span>Hello, world</span>') | ||
for (let chunk of stream) | ||
console.log (stream) | ||
```javascript | ||
let tinyhtml = require ('tiny-html-lexer') | ||
let stream = tinyhtml.chunks ('<span>Hello, world</span>') | ||
for (let chunk of stream) | ||
console.log (chunk) | ||
``` | ||
Alternatively, without `for .. of`: | ||
Alternatively, without `for .. of` | ||
(should work just fine in ES5 environments): | ||
let stream = tinyhtml.chunks ('<span>Hello, world</span>') .next () | ||
while (!stream.done) { | ||
console.log (stream.value) | ||
stream.next () | ||
} | ||
```javascript | ||
let stream = tinyhtml.chunks ('<span>Hello, world</span>') .next () | ||
while (!stream.done) { | ||
console.log (stream.value) | ||
stream.next () | ||
} | ||
``` | ||
Each call to `next ()` mutates and returns the iterator object itself, | ||
rather than the usual separate `{ value, done }` objects. It seems superfluous | ||
to create new wrapper objects `{ value, done }` for each chunk, so I went with | ||
this instead. | ||
to create new wrapper objects for each chunk, so I went with this instead. | ||
@@ -92,2 +96,7 @@ Tokens are tuples (arrays) `[type, chunk]` where type is one of | ||
License | ||
----------- | ||
MIT. | ||
Enjoy! |
var samples = | ||
[ 'named & char ref in data' | ||
, 'non-terminated named charref in data char ref in & data' | ||
, 'non-terminated named charref in data char ref in &a data' | ||
, 'hexadecimal charref in data hexadecimal ref ೌ in data' | ||
, 'non-terminated Hexadecimal charref in data hexadecimal ref ೌ in data' | ||
, 'decimal charref in data decimal ref ф in data' | ||
, 'non-terminated Decimal charref in data decimal ref n in data' | ||
, 'special charref <input value=asda¬*=c></input>' | ||
, 'special charref <input value=asda¬=c></input>' | ||
, 'special charref <input value="asda¬it; I tell you"></input>' | ||
, 'non-special charref <input value=asda¬in*=c></input>' | ||
, 'non-special charref <input value=asda¬in=c></input>' | ||
, 'non-special charref <input value=asda∉=c></input>' | ||
, 'special charref in data ¬*=c' | ||
, 'special charref in data ¬=c' | ||
, 'special charref in data ¬it; I tell you' | ||
, 'non-special charref in data charref ¬in*=c' | ||
, 'non-special charref in data charref ¬in=c' | ||
, 'non-special charref in data charref ∉=c' | ||
, 'named charref in attribute <input value="you & me"/> and more' | ||
, 'named charref in attribute <input value=\'you & me\'/> and more' | ||
, 'named charref in attribute <input value=youme /> and more' | ||
, 'named charref in attribute <input value=&me /> and more' | ||
, 'named charref in attribute <input value=& attr=val /> and more' | ||
, 'named charref in attribute <input value=&o attr=val /> and more' | ||
, 'bogus charref in attribute <input value="you &# am me"/> and more' | ||
, 'bogus charref in attribute <input value=\'you &# amp me\'/> and more' | ||
, 'bogus charref in attribute <input value=you&x ampme /> and more' | ||
[ 'charref: named & in data' | ||
, 'charref: named non-terminated & in data' | ||
, 'charref: named non-terminated &a in data' | ||
, 'charref: hexadecimal ೌ in data' | ||
, 'charref: hexadecimal non-terminated ೌ in data' | ||
, 'charref: decimal ф in data' | ||
, 'charref: decimal non-terminated n in data' | ||
, 'charref: special <input value=asda¬*=c></input>' | ||
, 'charref: special <input value=asda¬=c></input>' | ||
, 'charref: special <input value="asda¬it; I tell you"></input>' | ||
, 'charref: non-special <input value=asda¬in*=c></input>' | ||
, 'charref: non-special <input value=asda¬in=c></input>' | ||
, 'charref: non-special <input value=asda∉=c></input>' | ||
, 'charref: special ¬*=c in data' | ||
, 'charref: special ¬=c in data' | ||
, 'charref: special ¬it; I tell you, in data' | ||
, 'charref: special ∉ I tell you, in data' | ||
, 'charref: non-special ¬in*=c in data' | ||
, 'charref: non-special ¬in=c in data' | ||
, 'charref: non-special ∉=c in data' | ||
, 'charref: named <input value="you & me"/> in attribute' | ||
, 'charref: named <input value=\'you & me\'/> in attribute' | ||
, 'charref: named <input value=youme /> in attribute' | ||
, 'charref: named <input value=&me /> in attribute' | ||
, 'charref: named <input value=& attr=val /> in attribute' | ||
, 'charref: named <input value=&o attr=val /> in attribute' | ||
, 'charref: bogus <input value="you &# am me"/> in attribute' | ||
, 'charref: bogus <input value=\'you &# amp me\'/> in attribute' | ||
, 'charref: bogus <input value=you&x ampme /> in attribute' | ||
, 'charref: ampHash &# such' | ||
, '' | ||
, 'rcdata <textarea> asdf & & <textareaNot </textarea> and more' | ||
@@ -44,23 +48,34 @@ , 'rcdata2 <textarea> asdf & & </textarea( and not ending> it' | ||
, 'bad end tag <div style=color:blue> This is blue </ div> And this too!' | ||
, 'comment1 comment <!-- with -> within --> and subsequent data' | ||
, 'comment2 comment <!-- with bogus end -> and subsequent data' | ||
, 'comment3 <!-- Comment with -- double dash within --> and subsequent data' | ||
, 'comment4 <!-- Comment with --!- weird stuff within --> and subsequent data' | ||
, 'comment5 <!-- Comment with strange end --!> and subsequent data' | ||
, 'comment6 <!--!> and such' | ||
, 'comment7 <!--> and such' | ||
, 'comment8 <!-> and such' | ||
, 'comment9 <!---!> and such' | ||
, 'comment10 <!----!> and such' | ||
, 'closePlaintext hi <plaintext>asd<as &ap, </plaintext> cannot be ended' | ||
, '' | ||
, 'comment: <!weird markup declaration> and such' | ||
, 'comment: <!> and such' | ||
, 'comment: <?> and such' | ||
, 'comment: </> and such' | ||
, 'comment: <!-> and such' | ||
, 'comment: <?-> and such' | ||
, 'comment: <!-> and such' | ||
, 'comment: <!--> and such' | ||
, 'comment: <?--> and such' | ||
, 'comment: <!--> and such' | ||
, 'comment: <!--!> and such' | ||
, 'comment: <!--> and such' | ||
, 'comment: <!-> and such' | ||
, 'comment: <!---!> and such' | ||
, 'comment: <!----!> and such' | ||
, 'comment: <!-- with -> within --> and subsequent data' | ||
, 'comment: <!-- with bogus end -> and subsequent data' | ||
, 'comment: <!-- Comment with -- double dash within --> and subsequent data' | ||
, 'comment: <!-- Comment with --!- weird stuff within --> and subsequent data' | ||
, 'comment: <!-- Comment with strange end --!> and subsequent data' | ||
, 'bogus comment: <! with end !@> and subsequent data' | ||
, 'bogus comment: </ with end !@> and subsequent data' | ||
, 'bogus comment: <? with end !@> and subsequent data' | ||
, 'bogus comment: <!- with end -> and subsequent data' | ||
, '' | ||
, 'missing space attribues connected <div name="a"name="b" >' | ||
, 'nonalpha attribute weird template tag <div {name="a" name="b" >' | ||
, 'bogus1 bogus comment <! with end !@> and subsequent data' | ||
, 'bogus2 bogus comment </ with end !@> and subsequent data' | ||
, 'bogus3 bogus comment <? with end !@> and subsequent data' | ||
, 'bogus4 bogus comment <!- with end -> and subsequent data' | ||
, 'bogus5 An empty bogus comment <!>' | ||
, 'invalidMD <!weird markup declaration>' | ||
, 'normalHtml This is <span class = "s1">html</span> Yeah!' | ||
, 'autocloseAttempt This is <span / attr >html</span> Yeah!' | ||
, 'closePlaintext hi <plaintext>asd<as &ap, </plaintext> cannot be ended' | ||
, 'unescaped ampersand data & such' | ||
@@ -70,7 +85,21 @@ , 'unescaped ampersand Hash data &# such' | ||
, 'unescaped ampersand HashExZed data &#xz such' | ||
, 'hexDigits data 
 such' | ||
, 'decimal charref data  such' | ||
, 'named charref data &name such' | ||
, 'ampHash data &# such' | ||
, 'slashes <span/////name////=/blabla>' | ||
, '' | ||
, 'slashes: <span/>' | ||
, 'slashes: <span name=foo//>' | ||
, 'slashes: <div//>' | ||
, 'slashes: <div/foo/bar//>' | ||
, 'slashes: <span//>' | ||
, 'slashes: <span />' | ||
, 'slashes: <span <>' | ||
, 'slashes: <span //>' | ||
, 'slashes: <span / />' | ||
, 'slashes: <span/////>' | ||
, 'slashes: <span/////name////=/blabla>' | ||
, 'slashes: <span / attr >foo bar</span>' | ||
, 'slashes: <span name=/ >asdf' | ||
, 'slashes: <span name=/>asdf' | ||
, 'slashes: <span name=// />asdf' | ||
, 'slashes: <span name= / />asdf' | ||
, 'weirdEquals <span attr = / asd >content</span>' | ||
@@ -80,5 +109,2 @@ , 'weirdEquals2 <span attr = @ asd >content</span>' | ||
, 'weirdEquals4 <span attr @= asd >content</span>' | ||
, 'dangerousSlash1 <span name=/ >asdf' | ||
, 'dangerousSlash2 <span name=/>asdf' | ||
, 'dangerousSlash3 <span name= / />asdf' | ||
, 'missingValue <span name=>asdf' | ||
@@ -85,0 +111,0 @@ , 'invalidAttributeValue1 <div class= =at >' |
"use strict" | ||
const walk = require ('./walk') | ||
, { tag, end, render } = require ('./tagscript') | ||
const { tag, end, renderTo } = require ('tagscript') | ||
const log = console.log.bind (console) | ||
module.exports = { head, renderTokens, flush } | ||
module.exports = { head, renderTokens, flush, flatten } | ||
function head (cssfile) { return function (contents) { | ||
@@ -37,6 +36,2 @@ const header = | ||
// | ||
const log = console.log.bind (console) | ||
function map (fn) { return function* (obj) { | ||
@@ -49,4 +44,3 @@ for (let a of obj) yield fn (a) | ||
try { | ||
for (let a of obj) | ||
process.stdout.write (render (a)) | ||
renderTo (process.stdout, obj) | ||
process.exit (205) | ||
@@ -58,17 +52,1 @@ } | ||
} | ||
function* flatten (obj) { | ||
for (let a of (walk (obj, iterables))) | ||
if (a.tag === 'leaf') yield a.value | ||
} | ||
function iterables (obj) { | ||
return obj == null ? walk.leaf (obj) | ||
: typeof obj[Symbol.iterator] === 'function' ? walk.shape (obj) | ||
: obj instanceof Array ? walk.shape (obj) | ||
: walk.leaf (obj) | ||
} | ||
"use strict" | ||
const tokenize = require ('../lib/tiny-lexer') | ||
, data = require ('./data/html') | ||
, data = require ('./data/samples') | ||
, { head, renderTokens, flush, flatten } = require ('./templates') | ||
@@ -25,3 +25,3 @@ | ||
let samples = data.samples.concat (data.EOFSamples) | ||
compose (flush, flatten, head ('file://'+__dirname+'/style/tokens.css?ao'), map (renderTokens), map (tokenize)) (samples) | ||
compose (flush, head ('file://'+__dirname+'/style/tokens.css?ao'), map (renderTokens), map (tokenize)) (samples) | ||
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No bug tracker
MaintenancePackage does not have a linked bug tracker in package.json.
Found 1 instance in 1 package
No website
QualityPackage does not have a website.
Found 1 instance in 1 package
0
2
101
25629
1
11
586