Comparing version 1.6.1 to 2.0.0
// Generated by CoffeeScript 2.5.1 | ||
(function() { | ||
'use strict'; | ||
var CND, Cupofjoe, DATOM, GRAMMAR, LEXER, MAIN, PARSER, alert, assign, badge, debug, echo, excluded_content_parts, freeze, help, info, is_frozen, isa, jr, lets, log, new_datom, rpr, select, thaw, type_of, types, urge, validate, warn, whisper, | ||
var CND, Cupofjoe, DATOM, MAIN, alert, assign, badge, debug, echo, excluded_content_parts, freeze, help, info, is_frozen, isa, jr, lets, log, new_datom, rpr, select, thaw, type_of, types, urge, validate, warn, whisper, | ||
indexOf = [].indexOf; | ||
@@ -59,8 +59,2 @@ | ||
LEXER = require('./chevrotain-html/chevrotain-lexer'); | ||
PARSER = require('./chevrotain-html/chevrotain-parser'); | ||
GRAMMAR = require('./chevrotain-html/chevrotain-grammar'); | ||
//=========================================================================================================== | ||
@@ -420,42 +414,2 @@ | ||
//=========================================================================================================== | ||
// PARSING | ||
//----------------------------------------------------------------------------------------------------------- | ||
this.datoms_from_html = function(html, settings) { | ||
var R, defaults, parsification, tokenization; | ||
defaults = { | ||
lexer_mode: 'outside_mode', | ||
parser_start: 'document' | ||
}; | ||
settings = {...defaults, ...settings}; | ||
tokenization = LEXER.tokenize(html, settings.lexer_mode); | ||
parsification = PARSER.parse(tokenization, settings.parser_start); | ||
R = { | ||
source: html, | ||
cst: parsification.cst, | ||
lexer_mode: settings.lexer_mode, | ||
parser_start: settings.parser_start, | ||
errors: { | ||
lexer: tokenization.errors, | ||
parser: parsification.errors | ||
} | ||
}; | ||
return GRAMMAR.extract_tokens(R); | ||
}; | ||
//----------------------------------------------------------------------------------------------------------- | ||
this.$datoms_from_html = function() { | ||
var $; | ||
({$} = (require('steampipes')).export()); | ||
return $((buffer_or_text, send) => { | ||
var d, i, len, ref; | ||
ref = this.datoms_from_html(buffer_or_text); | ||
for (i = 0, len = ref.length; i < len; i++) { | ||
d = ref[i]; | ||
send(d); | ||
} | ||
return null; | ||
}); | ||
}; | ||
//=========================================================================================================== | ||
// CUP OF HTML | ||
@@ -462,0 +416,0 @@ //----------------------------------------------------------------------------------------------------------- |
// Generated by CoffeeScript 2.5.1 | ||
(function() { | ||
'use strict'; | ||
var CND, FS, Html, Hyph, INTERTEXT, Intertext, MAIN, Mkts, Multimix, PATH, Patterns, Slabs, Tbl, alert, assign, badge, cast, debug, echo, help, info, inspect, isa, jr, log, rpr, type_of, urge, validate, warn, whisper; | ||
var CND, FS, Html, Hyph, INTERTEXT, Intertext, MAIN, Multimix, PATH, Patterns, Slabs, Tbl, alert, assign, badge, cast, debug, echo, help, info, inspect, isa, jr, log, rpr, type_of, urge, validate, warn, whisper; | ||
@@ -65,12 +65,2 @@ //########################################################################################################### | ||
Mkts = (function() { | ||
//----------------------------------------------------------------------------------------------------------- | ||
class Mkts extends Multimix {}; | ||
Mkts.include(require('./mkts')); | ||
return Mkts; | ||
}).call(this); | ||
Hyph = (function() { | ||
@@ -148,3 +138,2 @@ //----------------------------------------------------------------------------------------------------------- | ||
this.HTML = new Html(); | ||
this.MKTS = new Mkts(); | ||
this.HYPH = new Hyph(); | ||
@@ -151,0 +140,0 @@ this.SLABS = new Slabs(); |
@@ -0,1 +1,2 @@ | ||
// Generated by CoffeeScript 2.5.1 | ||
(function() { | ||
@@ -739,1 +740,3 @@ 'use strict'; | ||
}).call(this); | ||
//# sourceMappingURL=tabulate.test.js.map |
{ | ||
"name": "intertext", | ||
"version": "1.6.1", | ||
"version": "2.0.0", | ||
"description": "Services for Recurrent Text-related Tasks", | ||
@@ -32,18 +32,16 @@ "main": "lib/main.js", | ||
"dependencies": { | ||
"atlas-html-stream": "^1.2.0", | ||
"chevrotain": "^6.5.0", | ||
"cnd": "^5.3.1", | ||
"cnd": "^5.3.2", | ||
"cupofjoe": "0.0.4", | ||
"datom": "^4.0.0", | ||
"hyphenopoly": "^4.2.1", | ||
"intertype": "^3.4.0", | ||
"datom": "^5.0.0", | ||
"hyphenopoly": "^4.3.0", | ||
"intertype": "^3.4.1", | ||
"linebreak": "^1.0.2", | ||
"multimix": "^2.2.0", | ||
"multimix": "^2.2.1", | ||
"steampipes": "^6.1.1", | ||
"term-size": "^2.2.0", | ||
"to-width": "^1.0.3" | ||
"to-width": "^1.0.4" | ||
}, | ||
"devDependencies": { | ||
"guy-test": "^1.4.2" | ||
"guy-test": "^2.0.1" | ||
} | ||
} |
@@ -11,8 +11,5 @@ | ||
- [General Considerations](#general-considerations) | ||
- [HTML Parsing](#html-parsing) | ||
- [HTML Generation](#html-generation) | ||
- [HTML Generation from Datoms](#html-generation-from-datoms) | ||
- [HTML Generation from Method Calls](#html-generation-from-method-calls) | ||
- [Example: HTML Parsing and HTML Generation](#example-html-parsing-and-html-generation) | ||
- [Remarks](#remarks) | ||
@@ -27,6 +24,2 @@ <!-- END doctoc generated TOC please keep comment here to allow auto update --> | ||
# HTML parser | ||
html_from_datoms | ||
$html_from_datoms | ||
# HTML generator | ||
@@ -53,79 +46,6 @@ datoms_from_html | ||
* Parsing takes a single text (or a stream of texts) as input and generates a list of (or a stream of) | ||
[datoms](https://github.com/loveencounterflow/datom) as output. | ||
* Conversely, HTML generation works by taking a list (or a stream) of | ||
* HTML generation works by taking a list (or a stream) of | ||
[datoms](https://github.com/loveencounterflow/datom) as input and generating a single text (or a stream of | ||
texts) with tags and properly HTML-escaped text content as output. | ||
* In HTML5 parsing, no errors will be thrown; in principle, any string may be thrown at the parser. However, | ||
(in the future) there may be system-level datoms with warnings interspersed with the output. | ||
* [HTML5 empty tags](https://developer.mozilla.org/en-US/docs/Glossary/empty_element) will be honored: when | ||
parsing HTML, tags like `<br>`, `<img>`, `<hr>` are considered complete without being explicitly closed; | ||
their self-closing versions `<br/>`, `<img/>`, `<hr/>` will be parsed like the unslashed versions, and | ||
their closing counterparts `</br>`, `</img>`, `</hr>` will be silently ignored. | ||
> List of HTML5 empty tags: `area`, `base`, `br`, `col`, `embed`, `hr`, `img`, `input`, `link`, `meta`, | ||
> `param`, `source`, `track`, `wbr`. This list is fixed for now, but may conceivably become configurable | ||
> in the future. | ||
* InterText HTML uses the most generous definition of an XML name, ever. It basically allows anything except | ||
those few characters that would definitely mess with the rest of the grammar, so tags like `<123>`, | ||
`<foo:bar#baz.gnu bro:go=42>` are totally OK. Consumers are advised to do their own checking to narrow | ||
down available choices or interpret special constructs, as the case may be. A valid InterText HTML name is | ||
any sequence of one or more characters, excluding only | ||
* whitespace, | ||
* brackets (`{[(<>)]}`), | ||
* question and exclamation marks (`!?`), | ||
* slashes (`/`), | ||
* equal signs (`=`), | ||
* and quotes (`'` and `"`). | ||
* Special constructs are not further analyzed ATM; this includes | ||
* Doctypes (e.g. `<!DOCTYPE html>`), | ||
* XML declarations (e.g. `<?xml foo bar?>`), | ||
* Processing Instructions (e.g. `<?what ever?>`). | ||
Observe that tag-like constructs that start with `<?` (left pointy bracket, question mark) but end with a | ||
plain `>` (right pointy bracket) not preceded by a `?` (question mark) are considered ungrammatical (they | ||
are be allowed in SGML, though). | ||
### HTML Parsing | ||
HTML parsing uses [`atlassubbed/atlas-html-stream`](https://github.com/atlassubbed/atlas-html-stream) to | ||
turn HTML5 texts into series of [datoms](https://github.com/loveencounterflow/datom). Two HTML formats are | ||
supported: | ||
* plain HTML5, and | ||
* MKTScript, a nascent crossbreed of a kind-of MarkDown with HTMLish tags. | ||
Unless you know what you're after you'll probably want to use the plain HTML5 flavor. | ||
After `{ HTML, } = require 'intertext'`, use one of these methods: | ||
* `HTML.html_as_datoms = ( text ) ->` to turn HTML fragments or entire documents into a list of datoms, or | ||
* `HTML.mkts_html_as_datoms = ( text ) ->` to do the same with MKTScript. | ||
Both methods work pretty much the same and are the inverse operations to `HTML.datom_as_html()`: | ||
* All opening tags will be turned into datoms whose `$key` is the tagname prefixed with the left pointy | ||
bracket as sigil, and attribute name/value pairs becoming properties of the datom. | ||
* Closing tags will be turned into datoms whose `$key` is the tagname prefixed with the right pointy bracket | ||
as sigil. | ||
* For plain HTML, 'lone'/'self-closing' tags will be treated like an opening tag immediately followed by a | ||
closing tag. as sigil. | ||
* For MKTScript, 'lone'/'self-closing' tags will be turned into datoms whose `$key` is the tagname prefixed | ||
with the caret as sigil. | ||
* Intermittent text will be turned into datoms whose `$key` is `^text` and whose contents are stored under | ||
the `text` property. | ||
* Whitespace will be preserved. | ||
In [SteamPipe](https://github.com/loveencounterflow/steampipes) streams, use the transforms returned by | ||
* `$html_as_datoms()` | ||
* `$mkts_html_as_datoms()` | ||
for the same functionality; both transforms accept texts and buffers as inputs. | ||
### HTML Generation | ||
@@ -190,3 +110,3 @@ | ||
<!-- | ||
### Example: HTML Parsing and HTML Generation | ||
@@ -264,2 +184,2 @@ | ||
--> |
# InterText | ||
# 🅘🅝🅣🅔🅡🅣🅔🅧🅣 | ||
@@ -22,6 +22,2 @@ <!-- START doctoc generated TOC please keep comment here to allow auto update --> | ||
* [**InterText HTML**](./README-html.md): parse, generate HTML markup. | ||
* [**InterText MKTScript**](./README-mkts.md): parse, generate MKTScript markup. | ||
* [**InterText HYPH**](./README-hyphenation.md) for hyphenating text in multiple languages (only en-US | ||
@@ -52,3 +48,4 @@ covered so far, but underlying software is multilingual and configurable). | ||
* [X] use `INTERTEXT.rpr()` for tabulation instead of `JSON.stringify()` | ||
* [ ] implement path manipulation, integrate [`pathmap`](https://github.com/jeremyruppel/pathmap) | ||
* [ ] integrate color-related code from [DataMill | ||
colorizer](https://github.com/loveencounterflow/datamill/blob/2d0ca3a784c8f3f9ba8d9fd6277d18c4ee859fb1/src/experiments/colorizer.coffee) |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
10
660657
50
3936
50
+ Addeddatom@5.1.1(transitive)
+ Addedemittery@0.7.2(transitive)
+ Addedintertype@7.7.1(transitive)
+ Addedmultimix@4.1.0(transitive)
- Removedatlas-html-stream@^1.2.0
- Removedchevrotain@^6.5.0
- Removedatlas-html-stream@1.2.0(transitive)
- Removedatlas-seq-matcher@1.0.2(transitive)
- Removedchevrotain@6.5.0(transitive)
- Removeddatom@4.0.0(transitive)
- Removedemittery@0.6.0(transitive)
- Removedregexp-to-ast@0.4.0(transitive)
Updatedcnd@^5.3.2
Updateddatom@^5.0.0
Updatedhyphenopoly@^4.3.0
Updatedintertype@^3.4.1
Updatedmultimix@^2.2.1
Updatedto-width@^1.0.4