Socket
Socket
Sign inDemoInstall

intertext

Package Overview
Dependencies
Maintainers
1
Versions
45
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

intertext - npm Package Compare versions

Comparing version 0.1.0 to 0.2.0

lib/slabs.js

4

lib/html.js

@@ -148,6 +148,2 @@ (function() {

// slash = if sigil is '<' then '' else '/'
// return "<#{tagname}#{slash}>" if atxt is ''
// return "<#{tagname}#{atxt}#{slash}>"
//===========================================================================================================

@@ -154,0 +150,0 @@ // PARSING

@@ -1,7 +0,100 @@

// Generated by CoffeeScript 2.5.0
(function() {
'use strict';
var CND, alert, badge, cast, debug, echo, help, hyphenator, info, isa, log, rpr, type_of, types, urge, validate, warn, whisper;
//###########################################################################################################
CND = require('cnd');
rpr = CND.rpr;
badge = 'INTERTEXT/HYPHENATION';
log = CND.get_logger('plain', badge);
info = CND.get_logger('info', badge);
whisper = CND.get_logger('whisper', badge);
alert = CND.get_logger('alert', badge);
debug = CND.get_logger('debug', badge);
warn = CND.get_logger('warn', badge);
help = CND.get_logger('help', badge);
urge = CND.get_logger('urge', badge);
echo = CND.echo.bind(CND);
//...........................................................................................................
types = require('./types');
({isa, validate, cast, type_of} = types);
hyphenator = null;
//-----------------------------------------------------------------------------------------------------------
/* thx to https://stackoverflow.com/a/881111/7568091, https://jsperf.com/performance-of-match-vs-split */
this.soft_hyphen_chr = '\u00ad';
this.soft_hyphen_pattern = /\u00ad/g;
this.count_soft_hyphens = function(text) {
return (text.split(this.soft_hyphen_chr)).length - 1;
};
this.reveal_hyphens = function(text, replacement = '-') {
return text.replace(this.soft_hyphen_pattern, replacement);
};
this.hyphenate = function(text) {
return (hyphenator != null ? hyphenator : hyphenator = this.new_hyphenator())(text);
};
//-----------------------------------------------------------------------------------------------------------
this.new_hyphenator = function() {
/* https://github.com/mnater/Hyphenopoly */
/* return value of call to `config()` is hyphenation function when `require` contain one element, a map
from language codes to functions otherwise; this we fix here: */
/* see https://github.com/mnater/Hyphenopoly > docs > Setup.md */
var H9Y, _hyphenators/* also available as per-language setting */, settings;
H9Y = require('hyphenopoly/hyphenopoly.module');
settings = {
// hyphen: '\u00ad'
// "exceptions": {
// "de": "Algo-rithmus",
// "global": "Silben-trennung"
// "exceptions": {"de": "Algo-rithmus, Algo-rithmus"},
// exceptions: {"global": "Silben-trennung"},
sync: true,
require: ['en-us'], // [ "de", "en-us"],
orphanControl: 1/* allow orphans */,
compound: 'auto'/* all, auto, hyphen; `all` inserts ZWSP after existing hyphen */,
normalize: false/* if true, transforms text to some kind of Unicode normal form */,
mixedCase: true,
minWordLength: 4,
leftmin: 2/* also available as per-language setting */,
rightmin: 2
};
_hyphenators = H9Y.config(settings);
validate.function(_hyphenators);
return _hyphenators;
};
// switch ( type = type_of _hyphenators )
// when 'function' then hyphenators = new Map(); hyphenators.set 'en-us', _hyphenators
// when 'map' then null
// else throw new Error "^3464^ unknown hyphenators type #{rpr type}"
// return hyphenators.get 'en-us'
/*
collection of words that are not satisfactorily hyphenated
to be added to an exceptions dictionary
process
su-per-cal-ifrag-ilis-tic
*/
}).call(this);
//# sourceMappingURL=hyphenation.js.map

@@ -45,2 +45,9 @@ (function() {

/*
#...........................................................................................................
_format = require 'number-format.js'
format_float = ( x ) -> _format '#,##0.000', x
format_integer = ( x ) -> _format '#,##0.', x
format_as_percentage = ( x ) -> _format '#,##0.00', x * 100
*/
//===========================================================================================================

@@ -53,3 +60,2 @@

class Intertext extends Multimix {
// @include ( require './outliner.mixin' ), { overwrite: false, }
// @extend MAIN, { overwrite: false, }

@@ -61,2 +67,4 @@

this.HTML = require('./html');
this.HYPH = require('./hyphenation');
this.SLABS = require('./slabs');
if (target != null) {

@@ -63,0 +71,0 @@ this.export(target);

@@ -861,2 +861,25 @@ (function() {

[
"<p>here and<br>there</p>",
[
{
"$key": "<p"
},
{
"text": "here and",
"$key": "^text"
},
{
"$key": "<br"
},
{
"text": "there",
"$key": "^text"
},
{
"$key": ">p"
}
],
null
],
[
"<p>here and<br/>there</p>",

@@ -1347,2 +1370,63 @@ [

//-----------------------------------------------------------------------------------------------------------
this["demo"] = function(T, done) {
var d, datoms, i, len, ref, text;
text = `<!DOCTYPE html>
<h1><strong>CHAPTER VI.</strong> <name ref=hd553>Humpty Dumpty</h1>
<p id=p227>However, the egg only got larger and larger, and <em>more and more human</em>:<br>
when she had come within a few yards of it, she saw that it had eyes and a nose and mouth; and when she
had come close to it, she saw clearly that it was <name ref=hd556>HUMPTY DUMPTY</name> himself. ‘It can’t
be anybody else!’ she said to herself.<br/>
‘I’m as certain of it, as if his name were written all over his face.’
`;
ref = datoms = HTML.html_as_datoms(text);
for (i = 0, len = ref.length; i < len; i++) {
d = ref[i];
echo(jr(d));
}
echo('-'.repeat(108));
echo(((function() {
var j, len1, results;
results = [];
for (j = 0, len1 = datoms.length; j < len1; j++) {
d = datoms[j];
results.push(HTML.datom_as_html(d));
}
return results;
})()).join(''));
//.........................................................................................................
done();
return null;
};
//-----------------------------------------------------------------------------------------------------------
this["demo (buffer)"] = function(T, done) {
var buffer, d, datoms, i, len, ref, text;
text = `<!DOCTYPE html>
<h1><strong>CHAPTER VI.</strong> <name ref=hd553>Humpty Dumpty</h1>`;
buffer = Buffer.from(text);
debug('^80009^', buffer);
ref = datoms = HTML.html_as_datoms(buffer);
for (i = 0, len = ref.length; i < len; i++) {
d = ref[i];
echo(jr(d));
}
echo('-'.repeat(108));
echo(((function() {
var j, len1, results;
results = [];
for (j = 0, len1 = datoms.length; j < len1; j++) {
d = datoms[j];
results.push(HTML.datom_as_html(d));
}
return results;
})()).join(''));
//.........................................................................................................
done();
return null;
};
//###########################################################################################################

@@ -1352,4 +1436,6 @@ if (module === require.main) {

// await @_demo()
test(this);
return help('ok');
// test @
help('ok');
// test @[ "demo" ]
return test(this["demo (buffer)"]);
})();

@@ -1356,0 +1442,0 @@ }

{
"name": "intertext",
"version": "0.1.0",
"version": "0.2.0",
"description": "Services for Recurrent Text-related Tasks",

@@ -34,3 +34,5 @@ "main": "lib/main.js",

"guy-test": "^1.4.1",
"hyphenopoly": "^4.0.0",
"intertype": "^3.1.0",
"linebreak": "^1.0.2",
"multimix": "^2.1.1",

@@ -37,0 +39,0 @@ "steampipes": "^3.5.1"

@@ -10,4 +10,6 @@ <!-- START doctoc generated TOC please keep comment here to allow auto update -->

- [Turning Texts into "Slabs"](#turning-texts-into-slabs)
- [HTML Parsing](#html-parsing)
- [HTML Generation](#html-generation)
- [HTML](#html)
- [HTML Parsing](#html-parsing)
- [HTML Generation](#html-generation)
- [Example: HTML Parsing and HTML Generation](#example-html-parsing-and-html-generation)
- [Codepoint Characterization](#codepoint-characterization)

@@ -44,6 +46,14 @@ - [Benchmarks](#benchmarks)

see jzr/benchmarks/src/hyphenation/main.coffee
see jzr/benchmarks/README.md
probably using `mnater/hyphenopoly`
Implemented with [`mnater/hyphenopoly`](https://github.com/mnater/Hyphenopoly).
* `INTERTEXT.HYPH.hyphenate = ( text ) ->`: return the text with soft hyphens (U+00ad) inserted. For
languages other than US English, `INTERTEXT.HYPH.new_hyphenator = ( settings ) ->` may in a future version
be used to obtain a custom hyphenation function.
* `INTERTEXT.HYPH.count_soft_hyphens = ( text ) ->`: Count occurances of U+00ad in `text`.
* `INTERTEXT.HYPH.reveal_hyphens = ( text, replacement = '-' ) ->`: Replace all soft hyphens with
`replacement`.
### Turning Texts into "Slabs"

@@ -67,16 +77,138 @@

## HTML Parsing
## HTML
see jzr/benchmarks/src/streaming-html-parsers/main.coffee
see jzr/benchmarks/src/streaming-html-parsers/mkts-tagparser.coffee
### HTML Parsing
probably using `atlassubbed/atlas-html-stream`
HTML parsing uses [`atlassubbed/atlas-html-stream`](https://github.com/atlassubbed/atlas-html-stream) to
turn HTML5 texts into series of [datoms](https://github.com/loveencounterflow/datom). Two HTML formats are
supported:
* plain HTML5, and
* MKTScript, a nascent crossbreed of a kind-of MarkDown with HTMLish tags.
## HTML Generation
Unless you know what you're after you'll probably want to use the plain HTML5 flavor.
Successor to `coffeenode-teacup`
After `{ HTML, } = require 'intertext'`, use one of these methods:
Serialization implemented in [Datom](https://github.com/loveencounterflow/datom)
* `HTML.html_as_datoms = ( text ) ->` to turn HTML fragments or entire documents into a list of datoms, or
* `HTML.mkts_html_as_datoms = ( text ) ->` to do the same with MKTScript.
Both methods work pretty much the same and are the inverse operations to `HTML.datom_as_html()`:
* All opening tags will be turned into datoms whose `$key` is the tagname prefixed with the left pointy
bracket as sigil, and attribute name/value pairs becoming properties of the datom.
* Closing tags will be turned into datoms whose `$key` is the tagname prefixed with the right pointy bracket
as sigil.
* For plain HTML, 'lone'/'self-closing' tags will be treated like an opening tag immediately followed by a
closing tag. as sigil.
* For MKTScript, 'lone'/'self-closing' tags will be turned into datoms whose `$key` is the tagname prefixed
with the caret as sigil.
* Intermittent text will be turned into datoms whose `$key` is `^text` and whose contents are stored under
the `text` property.
* Whitespace will be preserved.
### HTML Generation
<!-- Successor to `coffeenode-teacup`? -->
`{ HTML, } = require 'intertext'`
* `HTML.datom_as_html = ( d ) ->`
* For the tagname:
* `d.$key` will become the tagname
* the tagname must conform to the [XML tagname restrictions](https://www.w3.org/TR/xml)
* For the attributes:
* all facets with value `true` (the boolean, not the text) will be turned into 'lone attributes', such
that `{ $key: '<p', contenteditable: true, }` will result in `<p contenteditable>`
* facet values are subject to HTML5 attribute value escaping rules as detailed in
https://mathiasbynens.be/notes/unquoted-attribute-values
* where permitted, values will be left unquoted ('naked'); where necessary, values will be surrounded
by `'` (single quotes)
* facets with an empty string are not treated specially; per attribute value escaping rules, they will
result in `''` (two single quotes)
* all keys that start with a `$` will be ignored
* if `d.$value` is an object, its facets will be turned into HTML attributes; all other keys are ignored
* Open questions:
* how to treat system-level names (sigils `[`, `~`, `]`)?
* ignore?
* as comments?
* as prefixed/namespaced tags?
* how to treat datom keys that contain hyphens, underscores?
* turn underscores into hyphens?
### Example: HTML Parsing and HTML Generation
```coffee
text = """<!DOCTYPE html>
<h1><strong>CHAPTER VI.</strong> <name ref=hd553>Humpty Dumpty</h1>
<p id=p227>However, the egg only got larger and larger, and <em>more and more human</em>:<br>
when she had come within a few yards of it, she saw that it had eyes and a nose and mouth; and when she
had come close to it, she saw clearly that it was <name ref=hd556>HUMPTY DUMPTY</name> himself. ‘It can’t
be anybody else!’ she said to herself.<br/>
‘I’m as certain of it, as if his name were written all over his face.’
"""
for d in HTML.html_as_datoms text
log JSON.stringify d
log '-'.repeat 108
log ( HTML.datom_as_html d for d in datoms ).join ''
```
... will produce:
```json
{ "$key": "^doctype", "$value": "html", }
{ "$key": "^text", "text": "\n", }
{ "$key": "<h1", }
{ "$key": "<strong", }
{ "$key": "^text", "text": "CHAPTER VI.", }
{ "$key": ">strong", }
{ "$key": "^text", "text": " ", }
{ "$key": "<name", "ref": "hd553", }
{ "$key": "^text", "text": "Humpty Dumpty", }
{ "$key": ">h1", }
{ "$key": "^text", "text": "\n\n", }
{ "$key": "<p", "id": "p227", }
{ "$key": "^text", "text": "However, the egg only got larger and larger, and ", }
{ "$key": "<em", }
{ "$key": "^text", "text": "more and more human", }
{ "$key": ">em", }
{ "$key": "^text", "text": ":", }
{ "$key": "<br", }
{ "$key": "^text", "text": "\n\nwhen she had come within ... she saw clearly that it was ", }
{ "$key": "<name", "ref": "hd556", }
{ "$key": "^text", "text": "HUMPTY DUMPTY", }
{ "$key": ">name", }
{ "$key": "^text", "text": " himself. ‘It can’t\nbe anybody else!’ she said to herself.", }
{ "$key": "<br", }
{ "$key": ">br", }
{ "$key": "^text", "text": "\n\n‘I’m as certain ... all over his face.’\n", }
```
```html
<!DOCTYPE html>
<h1><strong>CHAPTER VI.</strong> <name ref=hd553>Humpty Dumpty</h1>
<p id=p227>However, the egg only got larger and larger, and <em>more and more human</em>:<br>
when she had come within a few yards of it, she saw that it had eyes and a nose and mouth; and when she
had come close to it, she saw clearly that it was <name ref=hd556>HUMPTY DUMPTY</name> himself. ‘It can’t
be anybody else!’ she said to herself.<br></br>
‘I’m as certain of it, as if his name were written all over his face.’
```
As can be seen, no validation will be done, and the parser will happily produce events for unclosed and
unbalanced closing tags. There is a minor issue with the `<br></br>` tag pair which will get resolved in
a future version.
## Codepoint Characterization

@@ -169,3 +301,3 @@

```
hyphenopoly hypher
hyphenopoly hypher
—————————————————————————————————————————————————————————

@@ -183,3 +315,3 @@ thun-der-storm’s thun-der-stor-m’s

```
hyphenopoly hypher
hyphenopoly hypher
—————————————————————————————————————————————————————————

@@ -186,0 +318,0 @@ Düssel-dorf Düs-sel-dorf

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc