Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

hyphen

Package Overview
Dependencies
Maintainers
1
Versions
31
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

hyphen - npm Package Compare versions

Comparing version 1.2.1 to 1.3.0

CHANGELOG

214

hyphen.js

@@ -23,2 +23,3 @@ /** Franklin M. Liang's hyphenation algorithm, implemented in Javascript.

var // settings
SETTING_ASYNC_MODE = false,
SETTING_DEBUG = false,

@@ -99,47 +100,118 @@ SETTING_HYPHEN_CHAR = "\u00AD";

function iterateSourceText(text) {
var nextCharIndex = 0;
function createTextChunkReader(text, hyphenChar) {
function readNextTextChunk() {
var nextTextChunk = "";
var states = { readWord: 1, returnWord: 2, returnChar: 3 };
shouldHyphenate = void 0;
return {
next: function() {
var nextChar,
nextWord = "";
chunkReader: while (nextCharIndex <= text.length) {
var nextChar = text.charAt(nextCharIndex++),
charIsLetter =
!!nextChar && !/\s|[\!-\@\[-\`\{-\~\u2013-\u203C]/.test(nextChar),
charIsAngleOpen = nextChar === "<",
charIsAngleClose = nextChar === ">",
charIsHyphen = nextChar === hyphenChar;
while ((nextChar = text.charAt(nextCharIndex++))) {
var charIsSpaceOrSpecial = /\s|[\!-\@\[-\`\{-\~\u2013-\u203C]/.test(
nextChar
);
do {
if (state === STATE_READ_TAG) {
if (charIsAngleClose) {
state = STATE_RETURN_TAG;
}
break;
}
var state = !charIsSpaceOrSpecial
? states.readWord
: state === states.readWord
? states.returnWord
: states.returnChar;
if (charIsHyphen) {
shouldHyphenate = SHOULD_SKIP;
state = STATE_READ_WORD;
break;
}
switch (state) {
case states.readWord:
nextWord += nextChar;
break;
if (charIsLetter) {
state = STATE_READ_WORD;
break;
}
case states.returnWord:
nextCharIndex--;
return nextWord;
if (state === STATE_READ_WORD) {
state = STATE_RETURN_WORD;
shouldHyphenate =
shouldHyphenate || (nextTextChunk.length > 4 && SHOULD_HYPHENATE);
break;
}
case states.returnChar:
return nextChar;
}
shouldHyphenate = SHOULD_SKIP;
state = STATE_RETURN_CHAR;
} while (0);
if (charIsAngleOpen && state !== STATE_RETURN_WORD) {
shouldHyphenate = SHOULD_SKIP;
state = STATE_READ_TAG;
}
if (nextWord !== "") {
return nextWord;
switch (state) {
case STATE_READ_TAG:
nextTextChunk += nextChar;
break;
case STATE_READ_WORD:
nextTextChunk += nextChar;
break;
case STATE_RETURN_CHAR:
nextTextChunk = nextChar;
break chunkReader;
case STATE_RETURN_TAG:
nextTextChunk += nextChar;
break chunkReader;
case STATE_RETURN_WORD:
nextCharIndex--;
break chunkReader;
}
}
};
return nextTextChunk || void 0;
}
function shouldNextHyphenate() {
return shouldHyphenate === SHOULD_HYPHENATE;
}
var nextCharIndex = 0,
SHOULD_HYPHENATE = 1,
SHOULD_SKIP = 2,
shouldHyphenate,
STATE_READ_TAG = 1,
STATE_READ_WORD = 2,
STATE_RETURN_CHAR = 3,
STATE_RETURN_TAG = 4,
STATE_RETURN_WORD = 5,
state;
return [readNextTextChunk, shouldNextHyphenate];
}
function start(text, patterns, cache, debug, hyphenChar) {
function start(text, patterns, cache, debug, hyphenChar, isAsync) {
function done() {
allTime = new Date() - allTime;
resolveNewText(newText);
if (debug) {
console.log(
"----------------\nHyphenation stats: " +
processedN +
" words processed, " +
hyphenatedN +
" words hyphenated"
);
console.log(`Work time: ${workTime / 1000}`);
console.log(`Wait time: ${(allTime - workTime) / 1000}`);
console.log(`All time: ${allTime / 1000}`);
}
}
var newText = "",
nextWord,
readWord = iterateSourceText(text),
nextTextChunk,
reader = createTextChunkReader(text, hyphenChar),
readNextTextChunk = reader[0],
shouldNextHyphenate = reader[1],
states = { hyphenateWord: 1, concatenate: 2 },

@@ -149,37 +221,54 @@ processedN = 0,

while ((nextWord = readWord.next())) {
var state =
nextWord.length > 4 ? states.hyphenateWord : states.concatenate;
var allTime = new Date(),
workTime = 0;
switch (state) {
case states.hyphenateWord:
if (!cache[nextWord])
cache[nextWord] = hyphenateWord(
nextWord,
patterns,
debug,
hyphenChar
);
var resolveNewText = function() {};
if (nextWord !== cache[nextWord]) hyphenatedN++;
(function nextTick() {
var loopStart = new Date();
nextWord = cache[nextWord];
while (
(!isAsync || new Date() - loopStart < 10) &&
(nextTextChunk = readNextTextChunk())
) {
var state = shouldNextHyphenate()
? states.hyphenateWord
: states.concatenate;
case states.concatenate:
newText += nextWord;
switch (state) {
case states.hyphenateWord:
if (!cache[nextTextChunk])
cache[nextTextChunk] = hyphenateWord(
nextTextChunk,
patterns,
debug,
hyphenChar
);
if (nextTextChunk !== cache[nextTextChunk]) hyphenatedN++;
nextTextChunk = cache[nextTextChunk];
case states.concatenate:
newText += nextTextChunk;
}
processedN++;
}
workTime += new Date() - loopStart;
processedN++;
if (!nextTextChunk) {
done();
} else {
setTimeout(nextTick);
}
})();
if (isAsync) {
return new Promise(function(resolve) {
resolveNewText = resolve;
});
} else {
return newText;
}
if (debug)
console.log(
"----------------\nHyphenation stats: " +
processedN +
" words processed, " +
hyphenatedN +
" words hyphenated"
);
return newText;
}

@@ -261,2 +350,5 @@

SETTING_HYPHEN_CHAR,
asyncMode =
(settings && settings.async !== undefined && settings.async) ||
SETTING_ASYNC_MODE,
cache = {},

@@ -283,5 +375,5 @@ // Preprocess patterns

return function(text) {
return start(text, patterns, cache, debug, hyphenChar);
return start(text, patterns, cache, debug, hyphenChar, asyncMode);
};
};
});
{
"name": "hyphen",
"version": "1.2.1",
"version": "1.3.0",
"description": "Franklin M. Liang's hyphenation algorithm, implemented in Javascript.",

@@ -5,0 +5,0 @@ "main": "hyphen.js",

@@ -1,43 +0,30 @@

[![npm](https://img.shields.io/npm/v/hyphen.svg?maxAge=2592000)](https://www.npmjs.com/package/hyphen)
hy-phen
========
![Franklin M. Liang's hyphenation algorithm](https://ytiurin.github.io/hyphen/01.png)
This is a JavaScript implementation of Franklin M. Liang's hyphenation algorithm. It relies on fast programming techniques to reduce execution time. The accuracy of hyphenation results depend on the quality of predefined patterns collections, that are different for every language. This implementation uses patterns collections from the TeX website, http://www.ctan.org/.
hyphen
======
This is a hyphenation library, based on Franklin M. Liang's [hyphenation algorithm](https://tug.org/docs/liang/ "Frank Liang wrote his Stanford Ph.D. thesis on a hyphenation algorithm that is standard in TeX, and has been adapted to numerous languages."). In core of the algorithm lies a set of hyphenation patterns. They are extracted from hand-hyphenated books and articles. Patterns for this library were taken from [ctan.org](https://ctan.org/ "The Comprehensive TEX Archive Network (CTAN) is the central place for all kinds of material around TEX.") and ported to Javascript.
```javascript
var text = "A certain king had a beautiful garden";
var hyphenate = createHyphenator(hyphenationPatternsEnGb);
var hyphenatedText = hyphenate(text);
var initialText = "A certain king had a beautiful garden"
var hyphenatedText = hyphenate(initialText);
console.log(hyphenatedText);
// A cer-tain king had a beau-ti-ful garden
```
### Demo
<a href="https://jsfiddle.net/ytiurin/ctwwwL0f/" target="_blank">JSFiddle</a>
Check the <a href="https://jsfiddle.net/ytiurin/ctwwwL0f/" target="_blank">demo</a>
### Algorithm description
<a href="https://tug.org/docs/liang/liang-thesis.pdf" target="_blank">PDF Document</a>
### Install
Install
-------
```
npm install hyphen
```
or
```
bower install hyphe
```
### Configuration
There are a couple of settings you can use to configure hyphenator function:
Configuration
-------------
- To change the default soft hyphen `\u00AD` character to something else, use `hyphenChar: "-"`
```javascript
var hyphenate = createHyphenator(hyphenationPatternsEnGb, {hyphenChar:'-'})
var hyphenate = createHyphenator(hyphenationPatternsEnGb, {hyphenChar: '-'});
```

@@ -47,85 +34,20 @@

```javascript
var hyphenate = createHyphenator(hyphenationPatternsEnGb, {debug:true})
var hyphenate = createHyphenator(hyphenationPatternsEnGb, {debug: true});
```
- There is an Async Mode available, when it's enabled, the script will work only 10ms on every event loop iteration, until finish processing. This is needed to prevent script from blocking UI, when processing large texts.
```javascript
var hyphenate = createHyphenator(hyphenationPatternsEnGb, {async: true});
hyphenate(text).then(function(hyphenatedText) {
console.log(hyphenatedText)
});
```
### Supported languages
- Afrikaans
- Ancient Greek
- Armenian
- Assamese
- Bahasa Indonesia
- Basque
- Bengali
- Bulgarian
- Catalan
- Chinese pinyin syllables
- Church Slavonic
- Classical Latin
- Coptic
- Croatian
- Czech
- Danish
- Dutch
- English (GB)
- English (US)
- Estonian
- Ethiopic
- Finnish
- French
- Friulan
- Galician
- Georgian
- German
- Gujarati
- Hindi
- Hungarian
- Icelandic Plain
- Interlingua
- Irish
- Italian
- Kannada
- Kurmanji
- Latin
- Latvian
- Lithuanian
- Liturgical Latin
- Malayalam
- Marathi
- Modern Monotonic
- Modern Polytonic
- Mongolian
- Norwegian
- Norwegian Bokmal
- Norwegian Nynorsk
- Occitan
- Oriy
- Pāli
- Panjabi
- Piedmontese
- Polish
- Portuguese
- Romanian
- Romansh
- Russian
- Sanskrit and Prakrit
- Serbian Cyrillic
- Serbocroatian Cyrillic
- Serbocroatian Latin
- Slovak
- Slovenian
- Spanish
- Swedish
- Swiss-German
- Tamil
- Telugu
- Thai
- Turkish
- Turkmen
- Ukrainian
- Upper Sorbian
- Welsh
Available patterns for these languages
--------------------------------
Afrikaans, Ancient Greek, Armenian, Assamese, Bahasa Indonesia, Basque, Bengali, Bulgarian, Catalan, Chinese pinyin syllables, Church Slavonic, Classical Latin, Coptic, Croatian, Czech, Danish, Dutch, English (GB), English (US), Estonian, Ethiopic, Finnish, French, Friulan, Galician, Georgian, German, Gujarati, Hindi, Hungarian, Icelandic Plain, Interlingua, Irish, Italian, Kannada, Kurmanji, Latin, Latvian, Lithuanian, Liturgical Latin, Malayalam, Marathi, Modern Monotonic, Modern Polytonic, Mongolian, Norwegian, Norwegian Bokmal, Norwegian Nynorsk, Occitan, Oriy, Pāli, Panjabi, Piedmontese, Polish, Portuguese, Romanian, Romansh, Russian, Sanskrit and Prakrit, Serbian Cyrillic, Serbocroatian Cyrillic, Serbocroatian Latin, Slovak, Slovenian, Spanish, Swedish, Swiss-German, Tamil, Telugu, Thai, Turkish, Turkmen, Ukrainian, Upper Sorbian, Welsh
### Hyphenation in CSS
Hyphenation in CSS
------------------
The CSS `hyphens` property is intended to add hyphenation support to modern browsers without Javascript:

@@ -139,9 +61,11 @@ ```css

### Alternatives
Check out other well-known Javascript hyphenation algorithm implementations:
Alternatives
------------
Check other great hyphenation libraries:
- [Hyphenator.js](http://mnater.github.io/Hyphenator/) Javascript that implements client-side hyphenation of HTML-Documents.
- [Hypher](https://github.com/bramstein/hypher) A fast and small JavaScript hyphenation engine.
- [Hyphenator.js](http://mnater.github.io/Hyphenator/) does client-side hyphenation of HTML-Documents.
- [Hypher](https://github.com/bramstein/hypher) A fast and small hyphenation engine.
### License
License
-------
MIT
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc