intertext - npm Package Compare versions

Comparing version 1.6.1 to 2.0.0

lib/html.js

		// Generated by CoffeeScript 2.5.1
		(function() {
		'use strict';
		var CND, Cupofjoe, DATOM, GRAMMAR, LEXER, MAIN, PARSER, alert, assign, badge, debug, echo, excluded_content_parts, freeze, help, info, is_frozen, isa, jr, lets, log, new_datom, rpr, select, thaw, type_of, types, urge, validate, warn, whisper,
		var CND, Cupofjoe, DATOM, MAIN, alert, assign, badge, debug, echo, excluded_content_parts, freeze, help, info, is_frozen, isa, jr, lets, log, new_datom, rpr, select, thaw, type_of, types, urge, validate, warn, whisper,
		indexOf = [].indexOf;
		@@ -59,8 +59,2 @@

		LEXER = require('./chevrotain-html/chevrotain-lexer');

		PARSER = require('./chevrotain-html/chevrotain-parser');

		GRAMMAR = require('./chevrotain-html/chevrotain-grammar');

		//===========================================================================================================
		@@ -420,42 +414,2 @@
		//===========================================================================================================
		// PARSING
		//-----------------------------------------------------------------------------------------------------------
		this.datoms_from_html = function(html, settings) {
		var R, defaults, parsification, tokenization;
		defaults = {
		lexer_mode: 'outside_mode',
		parser_start: 'document'
		};
		settings = {...defaults, ...settings};
		tokenization = LEXER.tokenize(html, settings.lexer_mode);
		parsification = PARSER.parse(tokenization, settings.parser_start);
		R = {
		source: html,
		cst: parsification.cst,
		lexer_mode: settings.lexer_mode,
		parser_start: settings.parser_start,
		errors: {
		lexer: tokenization.errors,
		parser: parsification.errors
		}
		};
		return GRAMMAR.extract_tokens(R);
		};

		//-----------------------------------------------------------------------------------------------------------
		this.$datoms_from_html = function() {
		var $;
		({$} = (require('steampipes')).export());
		return $((buffer_or_text, send) => {
		var d, i, len, ref;
		ref = this.datoms_from_html(buffer_or_text);
		for (i = 0, len = ref.length; i < len; i++) {
		d = ref[i];
		send(d);
		}
		return null;
		});
		};

		//===========================================================================================================
		// CUP OF HTML
		@@ -462,0 +416,0 @@ //-----------------------------------------------------------------------------------------------------------

lib/main.js

		// Generated by CoffeeScript 2.5.1
		(function() {
		'use strict';
		var CND, FS, Html, Hyph, INTERTEXT, Intertext, MAIN, Mkts, Multimix, PATH, Patterns, Slabs, Tbl, alert, assign, badge, cast, debug, echo, help, info, inspect, isa, jr, log, rpr, type_of, urge, validate, warn, whisper;
		var CND, FS, Html, Hyph, INTERTEXT, Intertext, MAIN, Multimix, PATH, Patterns, Slabs, Tbl, alert, assign, badge, cast, debug, echo, help, info, inspect, isa, jr, log, rpr, type_of, urge, validate, warn, whisper;

		@@ -65,12 +65,2 @@ //###########################################################################################################

		Mkts = (function() {
		//-----------------------------------------------------------------------------------------------------------
		class Mkts extends Multimix {};

		Mkts.include(require('./mkts'));

		return Mkts;

		}).call(this);

		Hyph = (function() {
		@@ -148,3 +138,2 @@ //-----------------------------------------------------------------------------------------------------------
		this.HTML = new Html();
		this.MKTS = new Mkts();
		this.HYPH = new Hyph();
		@@ -151,0 +140,0 @@ this.SLABS = new Slabs();

lib/tests/tabulate.test.js

		@@ -0,1 +1,2 @@
		// Generated by CoffeeScript 2.5.1
		(function() {
		@@ -739,1 +740,3 @@ 'use strict';
		}).call(this);

		//# sourceMappingURL=tabulate.test.js.map

package.json

		{
		"name": "intertext",
		"version": "1.6.1",
		"version": "2.0.0",
		"description": "Services for Recurrent Text-related Tasks",
		@@ -32,18 +32,16 @@ "main": "lib/main.js",
		"dependencies": {
		"atlas-html-stream": "^1.2.0",
		"chevrotain": "^6.5.0",
		"cnd": "^5.3.1",
		"cnd": "^5.3.2",
		"cupofjoe": "0.0.4",
		"datom": "^4.0.0",
		"hyphenopoly": "^4.2.1",
		"intertype": "^3.4.0",
		"datom": "^5.0.0",
		"hyphenopoly": "^4.3.0",
		"intertype": "^3.4.1",
		"linebreak": "^1.0.2",
		"multimix": "^2.2.0",
		"multimix": "^2.2.1",
		"steampipes": "^6.1.1",
		"term-size": "^2.2.0",
		"to-width": "^1.0.3"
		"to-width": "^1.0.4"
		},
		"devDependencies": {
		"guy-test": "^1.4.2"
		"guy-test": "^2.0.1"
		}
		}

README-html.md

		@@ -11,8 +11,5 @@
		- [General Considerations](#general-considerations)
		- [HTML Parsing](#html-parsing)
		- [HTML Generation](#html-generation)
		- [HTML Generation from Datoms](#html-generation-from-datoms)
		- [HTML Generation from Method Calls](#html-generation-from-method-calls)
		- [Example: HTML Parsing and HTML Generation](#example-html-parsing-and-html-generation)
		- [Remarks](#remarks)

		@@ -27,6 +24,2 @@ <!-- END doctoc generated TOC please keep comment here to allow auto update -->

		# HTML parser
		html_from_datoms
		$html_from_datoms

		# HTML generator
		@@ -53,79 +46,6 @@ datoms_from_html

		* Parsing takes a single text (or a stream of texts) as input and generates a list of (or a stream of)
		[datoms](https://github.com/loveencounterflow/datom) as output.

		* Conversely, HTML generation works by taking a list (or a stream) of
		* HTML generation works by taking a list (or a stream) of
		[datoms](https://github.com/loveencounterflow/datom) as input and generating a single text (or a stream of
		texts) with tags and properly HTML-escaped text content as output.

		* In HTML5 parsing, no errors will be thrown; in principle, any string may be thrown at the parser. However,
		(in the future) there may be system-level datoms with warnings interspersed with the output.

		* [HTML5 empty tags](https://developer.mozilla.org/en-US/docs/Glossary/empty_element) will be honored: when
		parsing HTML, tags like `<br>`, `<img>`, `<hr>` are considered complete without being explicitly closed;
		their self-closing versions `<br/>`, `<img/>`, `<hr/>` will be parsed like the unslashed versions, and
		their closing counterparts `</br>`, `</img>`, `</hr>` will be silently ignored.

		> List of HTML5 empty tags: `area`, `base`, `br`, `col`, `embed`, `hr`, `img`, `input`, `link`, `meta`,
		> `param`, `source`, `track`, `wbr`. This list is fixed for now, but may conceivably become configurable
		> in the future.

		* InterText HTML uses the most generous definition of an XML name, ever. It basically allows anything except
		those few characters that would definitely mess with the rest of the grammar, so tags like `<123>`,
		`<foo:bar#baz.gnu bro:go=42>` are totally OK. Consumers are advised to do their own checking to narrow
		down available choices or interpret special constructs, as the case may be. A valid InterText HTML name is
		any sequence of one or more characters, excluding only
		* whitespace,
		* brackets (`{[(<>)]}`),
		* question and exclamation marks (`!?`),
		* slashes (`/`),
		* equal signs (`=`),
		* and quotes (`'` and `"`).

		* Special constructs are not further analyzed ATM; this includes
		* Doctypes (e.g. `<!DOCTYPE html>`),
		* XML declarations (e.g. `<?xml foo bar?>`),
		* Processing Instructions (e.g. `<?what ever?>`).
		Observe that tag-like constructs that start with `<?` (left pointy bracket, question mark) but end with a
		plain `>` (right pointy bracket) not preceded by a `?` (question mark) are considered ungrammatical (they
		are be allowed in SGML, though).

		### HTML Parsing

		HTML parsing uses [`atlassubbed/atlas-html-stream`](https://github.com/atlassubbed/atlas-html-stream) to
		turn HTML5 texts into series of [datoms](https://github.com/loveencounterflow/datom). Two HTML formats are
		supported:

		* plain HTML5, and
		* MKTScript, a nascent crossbreed of a kind-of MarkDown with HTMLish tags.

		Unless you know what you're after you'll probably want to use the plain HTML5 flavor.

		After `{ HTML, } = require 'intertext'`, use one of these methods:

		* `HTML.html_as_datoms = ( text ) ->` to turn HTML fragments or entire documents into a list of datoms, or

		* `HTML.mkts_html_as_datoms = ( text ) ->` to do the same with MKTScript.

		Both methods work pretty much the same and are the inverse operations to `HTML.datom_as_html()`:

		* All opening tags will be turned into datoms whose `$key` is the tagname prefixed with the left pointy
		bracket as sigil, and attribute name/value pairs becoming properties of the datom.
		* Closing tags will be turned into datoms whose `$key` is the tagname prefixed with the right pointy bracket
		as sigil.
		* For plain HTML, 'lone'/'self-closing' tags will be treated like an opening tag immediately followed by a
		closing tag. as sigil.
		* For MKTScript, 'lone'/'self-closing' tags will be turned into datoms whose `$key` is the tagname prefixed
		with the caret as sigil.
		* Intermittent text will be turned into datoms whose `$key` is `^text` and whose contents are stored under
		the `text` property.
		* Whitespace will be preserved.

		In [SteamPipe](https://github.com/loveencounterflow/steampipes) streams, use the transforms returned by

		* `$html_as_datoms()`
		* `$mkts_html_as_datoms()`

		for the same functionality; both transforms accept texts and buffers as inputs.

		### HTML Generation
		@@ -190,3 +110,3 @@


		<!--
		### Example: HTML Parsing and HTML Generation
		@@ -264,2 +184,2 @@


		-->

README.md


		# InterText
		# 🅘🅝🅣🅔🅡🅣🅔🅧🅣

		@@ -22,6 +22,2 @@ <!-- START doctoc generated TOC please keep comment here to allow auto update -->

		* [InterText HTML](./README-html.md): parse, generate HTML markup.

		* [InterText MKTScript](./README-mkts.md): parse, generate MKTScript markup.

		* [InterText HYPH](./README-hyphenation.md) for hyphenating text in multiple languages (only en-US
		@@ -52,3 +48,4 @@ covered so far, but underlying software is multilingual and configurable).
		* [X] use `INTERTEXT.rpr()` for tabulation instead of `JSON.stringify()`


		* [ ] implement path manipulation, integrate [`pathmap`](https://github.com/jeremyruppel/pathmap)
		* [ ] integrate color-related code from [DataMill
		colorizer](https://github.com/loveencounterflow/datamill/blob/2d0ca3a784c8f3f9ba8d9fd6277d18c4ee859fb1/src/experiments/colorizer.coffee)

lib/chevrotain-html/chevrotain-grammar.js

lib/chevrotain-html/chevrotain-grammar.js.map

lib/chevrotain-html/chevrotain-lexer.js

lib/chevrotain-html/chevrotain-lexer.js.map

lib/chevrotain-html/chevrotain-parser.js

lib/chevrotain-html/chevrotain-parser.js.map

lib/chevrotain-html/demo.js

lib/chevrotain-html/demo.js.map

lib/mkts.js

lib/mkts.js.map

README-mkts.md

src/chevrotain-html/chevrotain-grammar.coffee

src/chevrotain-html/chevrotain-lexer.coffee

src/chevrotain-html/chevrotain-parser.coffee

src/chevrotain-html/demo.coffee

src/mkts.coffee

lib/html.js.map