Comparing version 1.2.1 to 1.3.0
214
hyphen.js
@@ -23,2 +23,3 @@ /** Franklin M. Liang's hyphenation algorithm, implemented in Javascript. | ||
var // settings | ||
SETTING_ASYNC_MODE = false, | ||
SETTING_DEBUG = false, | ||
@@ -99,47 +100,118 @@ SETTING_HYPHEN_CHAR = "\u00AD"; | ||
function iterateSourceText(text) { | ||
var nextCharIndex = 0; | ||
function createTextChunkReader(text, hyphenChar) { | ||
function readNextTextChunk() { | ||
var nextTextChunk = ""; | ||
var states = { readWord: 1, returnWord: 2, returnChar: 3 }; | ||
shouldHyphenate = void 0; | ||
return { | ||
next: function() { | ||
var nextChar, | ||
nextWord = ""; | ||
chunkReader: while (nextCharIndex <= text.length) { | ||
var nextChar = text.charAt(nextCharIndex++), | ||
charIsLetter = | ||
!!nextChar && !/\s|[\!-\@\[-\`\{-\~\u2013-\u203C]/.test(nextChar), | ||
charIsAngleOpen = nextChar === "<", | ||
charIsAngleClose = nextChar === ">", | ||
charIsHyphen = nextChar === hyphenChar; | ||
while ((nextChar = text.charAt(nextCharIndex++))) { | ||
var charIsSpaceOrSpecial = /\s|[\!-\@\[-\`\{-\~\u2013-\u203C]/.test( | ||
nextChar | ||
); | ||
do { | ||
if (state === STATE_READ_TAG) { | ||
if (charIsAngleClose) { | ||
state = STATE_RETURN_TAG; | ||
} | ||
break; | ||
} | ||
var state = !charIsSpaceOrSpecial | ||
? states.readWord | ||
: state === states.readWord | ||
? states.returnWord | ||
: states.returnChar; | ||
if (charIsHyphen) { | ||
shouldHyphenate = SHOULD_SKIP; | ||
state = STATE_READ_WORD; | ||
break; | ||
} | ||
switch (state) { | ||
case states.readWord: | ||
nextWord += nextChar; | ||
break; | ||
if (charIsLetter) { | ||
state = STATE_READ_WORD; | ||
break; | ||
} | ||
case states.returnWord: | ||
nextCharIndex--; | ||
return nextWord; | ||
if (state === STATE_READ_WORD) { | ||
state = STATE_RETURN_WORD; | ||
shouldHyphenate = | ||
shouldHyphenate || (nextTextChunk.length > 4 && SHOULD_HYPHENATE); | ||
break; | ||
} | ||
case states.returnChar: | ||
return nextChar; | ||
} | ||
shouldHyphenate = SHOULD_SKIP; | ||
state = STATE_RETURN_CHAR; | ||
} while (0); | ||
if (charIsAngleOpen && state !== STATE_RETURN_WORD) { | ||
shouldHyphenate = SHOULD_SKIP; | ||
state = STATE_READ_TAG; | ||
} | ||
if (nextWord !== "") { | ||
return nextWord; | ||
switch (state) { | ||
case STATE_READ_TAG: | ||
nextTextChunk += nextChar; | ||
break; | ||
case STATE_READ_WORD: | ||
nextTextChunk += nextChar; | ||
break; | ||
case STATE_RETURN_CHAR: | ||
nextTextChunk = nextChar; | ||
break chunkReader; | ||
case STATE_RETURN_TAG: | ||
nextTextChunk += nextChar; | ||
break chunkReader; | ||
case STATE_RETURN_WORD: | ||
nextCharIndex--; | ||
break chunkReader; | ||
} | ||
} | ||
}; | ||
return nextTextChunk || void 0; | ||
} | ||
function shouldNextHyphenate() { | ||
return shouldHyphenate === SHOULD_HYPHENATE; | ||
} | ||
var nextCharIndex = 0, | ||
SHOULD_HYPHENATE = 1, | ||
SHOULD_SKIP = 2, | ||
shouldHyphenate, | ||
STATE_READ_TAG = 1, | ||
STATE_READ_WORD = 2, | ||
STATE_RETURN_CHAR = 3, | ||
STATE_RETURN_TAG = 4, | ||
STATE_RETURN_WORD = 5, | ||
state; | ||
return [readNextTextChunk, shouldNextHyphenate]; | ||
} | ||
function start(text, patterns, cache, debug, hyphenChar) { | ||
function start(text, patterns, cache, debug, hyphenChar, isAsync) { | ||
function done() { | ||
allTime = new Date() - allTime; | ||
resolveNewText(newText); | ||
if (debug) { | ||
console.log( | ||
"----------------\nHyphenation stats: " + | ||
processedN + | ||
" words processed, " + | ||
hyphenatedN + | ||
" words hyphenated" | ||
); | ||
console.log(`Work time: ${workTime / 1000}`); | ||
console.log(`Wait time: ${(allTime - workTime) / 1000}`); | ||
console.log(`All time: ${allTime / 1000}`); | ||
} | ||
} | ||
var newText = "", | ||
nextWord, | ||
readWord = iterateSourceText(text), | ||
nextTextChunk, | ||
reader = createTextChunkReader(text, hyphenChar), | ||
readNextTextChunk = reader[0], | ||
shouldNextHyphenate = reader[1], | ||
states = { hyphenateWord: 1, concatenate: 2 }, | ||
@@ -149,37 +221,54 @@ processedN = 0, | ||
while ((nextWord = readWord.next())) { | ||
var state = | ||
nextWord.length > 4 ? states.hyphenateWord : states.concatenate; | ||
var allTime = new Date(), | ||
workTime = 0; | ||
switch (state) { | ||
case states.hyphenateWord: | ||
if (!cache[nextWord]) | ||
cache[nextWord] = hyphenateWord( | ||
nextWord, | ||
patterns, | ||
debug, | ||
hyphenChar | ||
); | ||
var resolveNewText = function() {}; | ||
if (nextWord !== cache[nextWord]) hyphenatedN++; | ||
(function nextTick() { | ||
var loopStart = new Date(); | ||
nextWord = cache[nextWord]; | ||
while ( | ||
(!isAsync || new Date() - loopStart < 10) && | ||
(nextTextChunk = readNextTextChunk()) | ||
) { | ||
var state = shouldNextHyphenate() | ||
? states.hyphenateWord | ||
: states.concatenate; | ||
case states.concatenate: | ||
newText += nextWord; | ||
switch (state) { | ||
case states.hyphenateWord: | ||
if (!cache[nextTextChunk]) | ||
cache[nextTextChunk] = hyphenateWord( | ||
nextTextChunk, | ||
patterns, | ||
debug, | ||
hyphenChar | ||
); | ||
if (nextTextChunk !== cache[nextTextChunk]) hyphenatedN++; | ||
nextTextChunk = cache[nextTextChunk]; | ||
case states.concatenate: | ||
newText += nextTextChunk; | ||
} | ||
processedN++; | ||
} | ||
workTime += new Date() - loopStart; | ||
processedN++; | ||
if (!nextTextChunk) { | ||
done(); | ||
} else { | ||
setTimeout(nextTick); | ||
} | ||
})(); | ||
if (isAsync) { | ||
return new Promise(function(resolve) { | ||
resolveNewText = resolve; | ||
}); | ||
} else { | ||
return newText; | ||
} | ||
if (debug) | ||
console.log( | ||
"----------------\nHyphenation stats: " + | ||
processedN + | ||
" words processed, " + | ||
hyphenatedN + | ||
" words hyphenated" | ||
); | ||
return newText; | ||
} | ||
@@ -261,2 +350,5 @@ | ||
SETTING_HYPHEN_CHAR, | ||
asyncMode = | ||
(settings && settings.async !== undefined && settings.async) || | ||
SETTING_ASYNC_MODE, | ||
cache = {}, | ||
@@ -283,5 +375,5 @@ // Preprocess patterns | ||
return function(text) { | ||
return start(text, patterns, cache, debug, hyphenChar); | ||
return start(text, patterns, cache, debug, hyphenChar, asyncMode); | ||
}; | ||
}; | ||
}); |
{ | ||
"name": "hyphen", | ||
"version": "1.2.1", | ||
"version": "1.3.0", | ||
"description": "Franklin M. Liang's hyphenation algorithm, implemented in Javascript.", | ||
@@ -5,0 +5,0 @@ "main": "hyphen.js", |
142
README.md
@@ -1,43 +0,30 @@ | ||
[![npm](https://img.shields.io/npm/v/hyphen.svg?maxAge=2592000)](https://www.npmjs.com/package/hyphen) | ||
hy-phen | ||
======== | ||
![Franklin M. Liang's hyphenation algorithm](https://ytiurin.github.io/hyphen/01.png) | ||
This is a JavaScript implementation of Franklin M. Liang's hyphenation algorithm. It relies on fast programming techniques to reduce execution time. The accuracy of hyphenation results depend on the quality of predefined patterns collections, that are different for every language. This implementation uses patterns collections from the TeX website, http://www.ctan.org/. | ||
hyphen | ||
====== | ||
This is a hyphenation library, based on Franklin M. Liang's [hyphenation algorithm](https://tug.org/docs/liang/ "Frank Liang wrote his Stanford Ph.D. thesis on a hyphenation algorithm that is standard in TeX, and has been adapted to numerous languages."). In core of the algorithm lies a set of hyphenation patterns. They are extracted from hand-hyphenated books and articles. Patterns for this library were taken from [ctan.org](https://ctan.org/ "The Comprehensive TEX Archive Network (CTAN) is the central place for all kinds of material around TEX.") and ported to Javascript. | ||
```javascript | ||
var text = "A certain king had a beautiful garden"; | ||
var hyphenate = createHyphenator(hyphenationPatternsEnGb); | ||
var hyphenatedText = hyphenate(text); | ||
var initialText = "A certain king had a beautiful garden" | ||
var hyphenatedText = hyphenate(initialText); | ||
console.log(hyphenatedText); | ||
// A cer-tain king had a beau-ti-ful garden | ||
``` | ||
### Demo | ||
<a href="https://jsfiddle.net/ytiurin/ctwwwL0f/" target="_blank">JSFiddle</a> | ||
Check the <a href="https://jsfiddle.net/ytiurin/ctwwwL0f/" target="_blank">demo</a> | ||
### Algorithm description | ||
<a href="https://tug.org/docs/liang/liang-thesis.pdf" target="_blank">PDF Document</a> | ||
### Install | ||
Install | ||
------- | ||
``` | ||
npm install hyphen | ||
``` | ||
or | ||
``` | ||
bower install hyphe | ||
``` | ||
### Configuration | ||
There are a couple of settings you can use to configure hyphenator function: | ||
Configuration | ||
------------- | ||
- To change the default soft hyphen `\u00AD` character to something else, use `hyphenChar: "-"` | ||
```javascript | ||
var hyphenate = createHyphenator(hyphenationPatternsEnGb, {hyphenChar:'-'}) | ||
var hyphenate = createHyphenator(hyphenationPatternsEnGb, {hyphenChar: '-'}); | ||
``` | ||
@@ -47,85 +34,20 @@ | ||
```javascript | ||
var hyphenate = createHyphenator(hyphenationPatternsEnGb, {debug:true}) | ||
var hyphenate = createHyphenator(hyphenationPatternsEnGb, {debug: true}); | ||
``` | ||
- There is an Async Mode available, when it's enabled, the script will work only 10ms on every event loop iteration, until finish processing. This is needed to prevent script from blocking UI, when processing large texts. | ||
```javascript | ||
var hyphenate = createHyphenator(hyphenationPatternsEnGb, {async: true}); | ||
hyphenate(text).then(function(hyphenatedText) { | ||
console.log(hyphenatedText) | ||
}); | ||
``` | ||
### Supported languages | ||
- Afrikaans | ||
- Ancient Greek | ||
- Armenian | ||
- Assamese | ||
- Bahasa Indonesia | ||
- Basque | ||
- Bengali | ||
- Bulgarian | ||
- Catalan | ||
- Chinese pinyin syllables | ||
- Church Slavonic | ||
- Classical Latin | ||
- Coptic | ||
- Croatian | ||
- Czech | ||
- Danish | ||
- Dutch | ||
- English (GB) | ||
- English (US) | ||
- Estonian | ||
- Ethiopic | ||
- Finnish | ||
- French | ||
- Friulan | ||
- Galician | ||
- Georgian | ||
- German | ||
- Gujarati | ||
- Hindi | ||
- Hungarian | ||
- Icelandic Plain | ||
- Interlingua | ||
- Irish | ||
- Italian | ||
- Kannada | ||
- Kurmanji | ||
- Latin | ||
- Latvian | ||
- Lithuanian | ||
- Liturgical Latin | ||
- Malayalam | ||
- Marathi | ||
- Modern Monotonic | ||
- Modern Polytonic | ||
- Mongolian | ||
- Norwegian | ||
- Norwegian Bokmal | ||
- Norwegian Nynorsk | ||
- Occitan | ||
- Oriy | ||
- Pāli | ||
- Panjabi | ||
- Piedmontese | ||
- Polish | ||
- Portuguese | ||
- Romanian | ||
- Romansh | ||
- Russian | ||
- Sanskrit and Prakrit | ||
- Serbian Cyrillic | ||
- Serbocroatian Cyrillic | ||
- Serbocroatian Latin | ||
- Slovak | ||
- Slovenian | ||
- Spanish | ||
- Swedish | ||
- Swiss-German | ||
- Tamil | ||
- Telugu | ||
- Thai | ||
- Turkish | ||
- Turkmen | ||
- Ukrainian | ||
- Upper Sorbian | ||
- Welsh | ||
Available patterns for these languages | ||
-------------------------------- | ||
Afrikaans, Ancient Greek, Armenian, Assamese, Bahasa Indonesia, Basque, Bengali, Bulgarian, Catalan, Chinese pinyin syllables, Church Slavonic, Classical Latin, Coptic, Croatian, Czech, Danish, Dutch, English (GB), English (US), Estonian, Ethiopic, Finnish, French, Friulan, Galician, Georgian, German, Gujarati, Hindi, Hungarian, Icelandic Plain, Interlingua, Irish, Italian, Kannada, Kurmanji, Latin, Latvian, Lithuanian, Liturgical Latin, Malayalam, Marathi, Modern Monotonic, Modern Polytonic, Mongolian, Norwegian, Norwegian Bokmal, Norwegian Nynorsk, Occitan, Oriy, Pāli, Panjabi, Piedmontese, Polish, Portuguese, Romanian, Romansh, Russian, Sanskrit and Prakrit, Serbian Cyrillic, Serbocroatian Cyrillic, Serbocroatian Latin, Slovak, Slovenian, Spanish, Swedish, Swiss-German, Tamil, Telugu, Thai, Turkish, Turkmen, Ukrainian, Upper Sorbian, Welsh | ||
### Hyphenation in CSS | ||
Hyphenation in CSS | ||
------------------ | ||
The CSS `hyphens` property is intended to add hyphenation support to modern browsers without Javascript: | ||
@@ -139,9 +61,11 @@ ```css | ||
### Alternatives | ||
Check out other well-known Javascript hyphenation algorithm implementations: | ||
Alternatives | ||
------------ | ||
Check other great hyphenation libraries: | ||
- [Hyphenator.js](http://mnater.github.io/Hyphenator/) Javascript that implements client-side hyphenation of HTML-Documents. | ||
- [Hypher](https://github.com/bramstein/hypher) A fast and small JavaScript hyphenation engine. | ||
- [Hyphenator.js](http://mnater.github.io/Hyphenator/) does client-side hyphenation of HTML-Documents. | ||
- [Hypher](https://github.com/bramstein/hypher) A fast and small hyphenation engine. | ||
### License | ||
License | ||
------- | ||
MIT |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
3726896
81
305890
70