Comparing version 1.4.3 to 1.5.0
@@ -17,2 +17,4 @@ // Generated by CoffeeScript 2.5.1 | ||
this./* must NOT set global flag */xmlname_re = RegExp(`[${this.xmlname_re_head.source}][${this.xmlname_re_head.source}${this.xmlname_re_tail.source}]*`, "u"); | ||
this./* must NOT set global flag */mktsname_re_frontanchored = RegExp(`^[${this.xmlname_re_head.source}][${this.xmlname_re_head.source}${this.mktsname_re_tail.source}]*`, "u"); | ||
@@ -22,2 +24,4 @@ | ||
this./* must NOT set global flag */mktsname_re = RegExp(`[${this.xmlname_re_head.source}][${this.xmlname_re_head.source}${this.mktsname_re_tail.source}]*`, "u"); | ||
/* must NOT set global flag */ | ||
@@ -24,0 +28,0 @@ |
121
lib/html.js
// Generated by CoffeeScript 2.5.1 | ||
(function() { | ||
'use strict'; | ||
var CND, Cupofjoe, DATOM, HtmlParser, MAIN, alert, assign, badge, debug, echo, excluded_content_parts, freeze, help, info, is_frozen, isa, jr, lets, log, new_datom, rpr, select, thaw, type_of, types, urge, validate, warn, whisper, | ||
var CND, Cupofjoe, DATOM, GRAMMAR, LEXER, MAIN, PARSER, alert, assign, badge, debug, echo, excluded_content_parts, freeze, help, info, is_frozen, isa, jr, lets, log, new_datom, rpr, select, thaw, type_of, types, urge, validate, warn, whisper, | ||
indexOf = [].indexOf; | ||
@@ -53,4 +53,2 @@ | ||
//........................................................................................................... | ||
HtmlParser = require('atlas-html-stream'); | ||
({Cupofjoe} = require('cupofjoe')); | ||
@@ -62,2 +60,8 @@ | ||
LEXER = require('./chevrotain-html/chevrotain-lexer'); | ||
PARSER = require('./chevrotain-html/chevrotain-parser'); | ||
GRAMMAR = require('./chevrotain-html/chevrotain-grammar'); | ||
//=========================================================================================================== | ||
@@ -226,14 +230,2 @@ | ||
// #----------------------------------------------------------------------------------------------------------- | ||
// @datoms_as_nlhtml = ( ds... ) -> | ||
// R = '' | ||
// for d in ds.flat Infinity | ||
// html = @_html_from_datoms d | ||
// sigil = d.$key[ 0 ] | ||
// tagname = d.$key[ 1 .. ] | ||
// R += '\n' if sigil is '<' and isa._intertext_html_block_level_tagname tagname | ||
// R += html | ||
// # R += '\n' if | ||
// return R | ||
//----------------------------------------------------------------------------------------------------------- | ||
@@ -248,3 +240,3 @@ this.html_from_datoms = function(...ds) { | ||
d = ref[i]; | ||
results.push(this._html_from_datoms(d)); | ||
results.push(this._html_from_datom(d)); | ||
} | ||
@@ -255,2 +247,3 @@ return results; | ||
/* TAINT ^^^ ??? ^^^ */ | ||
this.$html_from_datoms = function() { | ||
@@ -262,3 +255,3 @@ var $; | ||
if (!isa.list(d)) { | ||
return send(this._html_from_datoms(d)); | ||
return send(this._html_from_datom(d)); | ||
} | ||
@@ -275,8 +268,8 @@ ref = this.html_from_datoms(...d); | ||
//----------------------------------------------------------------------------------------------------------- | ||
this._html_from_datoms = function(d) { | ||
this._html_from_datom = function(d) { | ||
var atxt, i, is_empty_tag, key, len, ref, ref1, ref2, ref3, sigil, slash, src, tagname, value, x_key, x_sys_key; | ||
if (isa.text(d)) { | ||
return this._html_from_datoms((this.text(d))[0]); | ||
return this._html_from_datom((this.text(d))[0]); | ||
} | ||
DATOM.types.validate.datom_datom(d); | ||
/* TAINT ??? */ DATOM.types.validate.datom_datom(d); | ||
atxt = ''; | ||
@@ -435,77 +428,25 @@ sigil = d.$key[0]; | ||
//----------------------------------------------------------------------------------------------------------- | ||
// @new_parse_method = ( settings ) -> | ||
// validate.parse_html_settings settings = { types.defaults.parse_html_settings..., settings..., } | ||
this._new_parse_method = function(settings) { | ||
/* NOTE strangely, throwing an error from inside the `data` handler seems to throw off the parser; | ||
even when both `parser.flushText()` and `parser.reset()` were called prior to throwing the error, all | ||
subsequent parsing calls will return empty lists. We therefore construct a new parser instance for | ||
each call to `datoms_from_html()`. */ | ||
var R, parser; | ||
R = null; | ||
parser = new HtmlParser({ | ||
preserveWS: true | ||
}); | ||
//......................................................................................................... | ||
parser.on('data', ({name, data, text}) => { | ||
var $value, has_keys, is_empty_tag, key, sigil, value; | ||
if (name != null) { | ||
name = name.toLowerCase(); | ||
this.datoms_from_html = function(html, settings) { | ||
var R, defaults, parsification, tokenization; | ||
defaults = { | ||
lexer_mode: 'outside_mode', | ||
parser_start: 'document' | ||
}; | ||
settings = {...defaults, ...settings}; | ||
tokenization = LEXER.tokenize(html, settings.lexer_mode); | ||
parsification = PARSER.parse(tokenization, settings.parser_start); | ||
R = { | ||
source: html, | ||
cst: parsification.cst, | ||
lexer_mode: settings.lexer_mode, | ||
parser_start: settings.parser_start, | ||
errors: { | ||
lexer: tokenization.errors, | ||
parser: parsification.errors | ||
} | ||
//....................................................................................................... | ||
if (name === '!doctype') { | ||
$value = 'html'; | ||
for (key in data) { | ||
$value = key; | ||
break; | ||
} | ||
return R.push(new_datom('^doctype', $value)); | ||
} | ||
if (text != null) { | ||
//....................................................................................................... | ||
return R.push(new_datom('^text', {text})); | ||
} | ||
is_empty_tag = isa._intertext_html_empty_element_tagname(name); | ||
// debug '^7787^', { name, data, text, is_empty_tag, } | ||
if (data == null) { | ||
if (is_empty_tag) { | ||
return; | ||
} | ||
// throw new Error "^intertext/_new_parse_method@6069^ found closing tag, but HTML5 <#{name}> is an empty tag" | ||
return R.push(new_datom('>' + name)); | ||
} | ||
has_keys = false; | ||
for (key in data) { | ||
value = data[key]; | ||
has_keys = true; | ||
if (value === '') { | ||
data[key] = true; | ||
} | ||
} | ||
sigil = is_empty_tag ? '^' : '<'; | ||
if (!has_keys) { | ||
return R.push(new_datom(sigil + name)); | ||
} | ||
return R.push(new_datom(sigil + name, data)); | ||
}); | ||
parser.on('error', function(error) { | ||
throw error; | ||
}); | ||
// parser.on 'end', -> R.push new_datom '^stop' | ||
//......................................................................................................... | ||
return (html) => { | ||
// urge '^7787^', jr html | ||
R = []; | ||
parser.write(html); | ||
parser.flushText(); | ||
// parser.reset() # call if parser is to be reused | ||
return R; | ||
}; | ||
return GRAMMAR.extract_tokens(R); | ||
}; | ||
//----------------------------------------------------------------------------------------------------------- | ||
this.datoms_from_html = function(html) { | ||
return this._new_parse_method()(html); | ||
}; | ||
//----------------------------------------------------------------------------------------------------------- | ||
this.$datoms_from_html = function() { | ||
@@ -512,0 +453,0 @@ var $; |
// Generated by CoffeeScript 2.5.1 | ||
(function() { | ||
'use strict'; | ||
var CND, FS, Html, Hyph, INTERTEXT, Intertext, MAIN, Mkts, Multimix, PATH, Patterns, Slabs, Tbl, alert, assign, badge, cast, debug, echo, help, info, isa, jr, log, rpr, type_of, urge, validate, warn, whisper; | ||
var CND, FS, Html, Hyph, INTERTEXT, Intertext, MAIN, Mkts, Multimix, PATH, Patterns, Slabs, Tbl, alert, assign, badge, cast, debug, echo, help, info, inspect, isa, jr, log, rpr, type_of, urge, validate, warn, whisper; | ||
@@ -46,2 +46,4 @@ //########################################################################################################### | ||
({inspect} = require('util')); | ||
Html = (function() { | ||
@@ -119,2 +121,16 @@ /* | ||
//----------------------------------------------------------------------------------------------------------- | ||
this.rpr = function(...P) { | ||
var x; | ||
return ((function() { | ||
var i, len, results; | ||
results = []; | ||
for (i = 0, len = P.length; i < len; i++) { | ||
x = P[i]; | ||
results.push(inspect(x, this.rpr_settings)); | ||
} | ||
return results; | ||
}).call(this)).join(' '); | ||
}; | ||
//=========================================================================================================== | ||
@@ -138,2 +154,8 @@ | ||
this.TBL = new Tbl(); | ||
this.rpr_settings = { | ||
depth: 2e308, | ||
maxArrayLength: 2e308, | ||
breakLength: 2e308, | ||
compact: true | ||
}; | ||
if (target != null) { | ||
@@ -140,0 +162,0 @@ this.export(target); |
@@ -0,1 +1,2 @@ | ||
// Generated by CoffeeScript 2.5.1 | ||
(function() { | ||
@@ -505,1 +506,3 @@ //########################################################################################################### | ||
}).call(this); | ||
//# sourceMappingURL=tabulate.js.map |
@@ -0,1 +1,2 @@ | ||
// Generated by CoffeeScript 2.5.1 | ||
(function() { | ||
@@ -719,1 +720,3 @@ 'use strict'; | ||
}).call(this); | ||
//# sourceMappingURL=tabulate.test.js.map |
{ | ||
"name": "intertext", | ||
"version": "1.4.3", | ||
"version": "1.5.0", | ||
"description": "Services for Recurrent Text-related Tasks", | ||
@@ -33,2 +33,3 @@ "main": "lib/main.js", | ||
"atlas-html-stream": "^1.2.0", | ||
"chevrotain": "^6.5.0", | ||
"cnd": "^5.3.0", | ||
@@ -35,0 +36,0 @@ "cupofjoe": "0.0.4", |
@@ -70,5 +70,22 @@ | ||
* InterText HTML uses the most generous definition of an XML name, ever. It basically allows anything except | ||
those few characters that would definitely mess with the rest of the grammar, so tags like `<123>`, | ||
`<foo:bar#baz.gnu bro:go=42>` are totally OK. Consumers are advised to do their own checking to narrow | ||
down available choices or interpret special constructs, as the case may be. A valid InterText HTML name is | ||
any sequence of one or more characters, excluding only | ||
* whitespace, | ||
* brackets (`{[(<>)]}`), | ||
* question and exclamation marks (`!?`), | ||
* slashes (`/`), | ||
* equal signs (`=`), | ||
* and quotes (`'` and `"`). | ||
* Special constructs are not further analyzed ATM; this includes | ||
* Doctypes (e.g. `<!DOCTYPE html>`), | ||
* XML declarations (e.g. `<?xml foo bar?>`), | ||
* Processing Instructions (e.g. `<?what ever?>`). | ||
Observe that tag-like constructs that start with `<?` (left pointy bracket, question mark) but end with a | ||
plain `>` (right pointy bracket) not preceded by a `?` (question mark) are considered ungrammatical (they | ||
are be allowed in SGML, though). | ||
### HTML Parsing | ||
@@ -75,0 +92,0 @@ |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
879976
66
6936
12
+ Addedchevrotain@^6.5.0
+ Addedchevrotain@6.5.0(transitive)
+ Addedregexp-to-ast@0.4.0(transitive)