ep_readability
Advanced tools
Comparing version 0.0.15 to 0.0.16
{ | ||
"name": "ep_readability", | ||
"version": "0.0.15", | ||
"version": "0.0.16", | ||
"description": "Calculates the Flesch readability index in Etherpad, client based.", | ||
@@ -5,0 +5,0 @@ "author": { |
# Click button to calculate the Flesch readability index | ||
There'll be a button which calculates and displays the Flesch redability index. | ||
Calculating is triggered by entering !, ?, RETURN or BACKSPACE. Or by pushing the button. | ||
Calculation of the Flesch index is triggered by entering !, ?, RETURN or BACKSPACE. Or by pushing the button. | ||
@@ -10,6 +11,12 @@ The button shows a tooltip, which provides additional information. | ||
## Calculation details | ||
The original Flesch index is calculated for all languages. German umlauts are replaced (ä = a, ß = ss...). | ||
## Readability | ||
Numbers are counted as a single syllable (1000,56 = one syllable). | ||
Links are counted as a single syllable (http://aaa-bbb.bla?foo=bar = one syllable). | ||
## Readability scale | ||
* -1000: Dissertation, red | ||
@@ -16,0 +23,0 @@ * 10: Officialese, red |
@@ -10,17 +10,21 @@ // ---------------------------------------------------------------------------- | ||
// @TODO rething this regex orgy: the input text is pretty wild sometimes | ||
exports.cleanText = function(text) { | ||
//console.log(text); | ||
// 1. Remove HTML entities @TODO add more entities | ||
// 2. Remove Etherpad spans, headings (style plugin) and formatting | ||
// 3. Get an Etherpad paragraph, add dot if no terminator or comma is at the end | ||
// @TODO replace the divs by a more simple paragraph placeholder delimiter | ||
// @TODO rething this regex orgy: the input text is pretty wild sometimes | ||
// 2. Check for plain text links | ||
// 3. Remove Etherpad spans, headings (style plugin) and formatting | ||
// 4. Get an Etherpad paragraph, add dot if no terminator or comma is at the end | ||
text = text.replace(/( )+/gm,' ') | ||
.replace(/(https?|ftp):[\w\.\/\?&=-]+/gm, 'link') | ||
.replace(/<(?!div|\/div)[^>]*>/gm,'').replace(/<\/(?!div)>/gm,'') | ||
.replace(/<div[^>]*>([^<]+[^:, !?.-])\s*<\/div>/gm, "$1."); | ||
//console.log(text); | ||
// This is not Etherpad specific: | ||
// @TODO Add URL matcher | ||
text = text | ||
.replace(/<[^>]+>/gm, '') // Strip tags (should be DIVs only at this point) | ||
.replace(/\d+([\.,]?\d+)*/g, 'two') // Convert numbers with delimiters to one syllable @TODO lookahead for space: EUR 1000. bla | ||
@@ -27,0 +31,0 @@ .replace(/[!?]/gm, '.') // Unify terminators |
@@ -65,2 +65,10 @@ var readab = require('../static/js/readability_button.js'); | ||
test.done(); | ||
} | ||
} | ||
exports.cleanTextLinks = function(test) { | ||
var text = '<div id="magicdomid8" class=""><br></div><div id="magicdomid9" class=""><span class="author-a-z65zjckdup0xdseeity url"><a href="http://jaja.nono-nana.bla?nix=doh">http://jaja.nono-nana.bla?nix=doh</a></span></div>'; | ||
var result = 'link.'; | ||
test.equal(result, readab.cleanText(text)); | ||
test.done(); | ||
} | ||
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
26838
13
389
52