ep_readability
Advanced tools
Comparing version 0.0.17 to 0.0.18
{ | ||
"name": "ep_readability", | ||
"version": "0.0.17", | ||
"version": "0.0.18", | ||
"description": "Calculates the Flesch readability index in Etherpad, client based.", | ||
@@ -5,0 +5,0 @@ "author": { |
@@ -17,3 +17,3 @@ # Click button to calculate the Flesch readability index | ||
Links are counted as a single syllable (http://aaa-bbb.bla?foo=bar = one syllable). | ||
Links are split into subparts (http://aaa-bbb.bla?foo=bar = http aaa bbb bla foo bar). | ||
@@ -59,8 +59,10 @@ ## Readability scale | ||
## Links and numbers | ||
## Numbers | ||
Rethink the current solution | ||
Rethink the current solution for counting numbers as one syllable. | ||
### Regex | ||
Rethink time definitions e.g., 20:30. | ||
## Regex | ||
The internal work is based on a regex orgy. Which could be rewritten, maybe using DOM for additional capabilities like highlighting long words and sentences. | ||
@@ -67,0 +69,0 @@ |
@@ -20,3 +20,5 @@ // ---------------------------------------------------------------------------- | ||
text = text.replace(/( )+/gm,' ') | ||
.replace(/(https?|ftp|file):[\w\.\/\?&=\-\_\:]+/gm, 'link') | ||
.replace(/((https?|ftp|file):[\w\.\/\?&=\-\_\:]+)/gm, function(match) { | ||
return match.split(/[^a-zA-Z0-9äüöÄÜÖß]/).join(' '); | ||
}) | ||
.replace(/<(?!div|\/div)[^>]*>/gm,'').replace(/<\/(?!div)>/gm,'') | ||
@@ -29,3 +31,2 @@ .replace(/<div[^>]*>([^<]+[^:, !?.-])\s*<\/div>/gm, "$1."); | ||
.replace(/<[^>]+>/gm, '') // Strip tags (should be DIVs only at this point) | ||
.replace(/\d+([\.,]?\d+)*/g, 'two') // Convert numbers with delimiters to one syllable @TODO lookahead for space: EUR 1000. bla | ||
@@ -32,0 +33,0 @@ .replace(/[!?]/gm, '.') // Unify terminators |
@@ -69,9 +69,9 @@ var readab = require('../static/js/readability_button.js'); | ||
var text = '<div id="magicdomid8" class=""><br></div><div id="magicdomid9" class=""><span class="author-a-z65zjckdup0xdseeity url"><a href="http://jaja.nono-nana.bla?nix=doh">http://jaja.nono-nana.bla?nix=doh</a></span></div>'; | ||
var result = 'link.'; | ||
var result = 'http jaja nono nana bla nix doh.'; | ||
test.equal(result, readab.cleanText(text)); | ||
var text = '<div id="magicdomid27" class=""><span class="author-a-z65zjckdup0xdseeity">Vortrag (</span><span class="author-a-z65zjckdup0xdseeity url"><a href="http://wiki.piratenpartei.de/Datei:Aaa_Bbb.jpg)">http://wiki.piratenpartei.de/Datei:Aaa_Bbb.jpg)</a></span></div>'; | ||
var result = 'Vortrag link.'; | ||
var result = 'Vortrag http wiki piratenpartei de Datei Aaa Bbb jpg.'; | ||
test.equal(result, readab.cleanText(text)); | ||
test.done(); | ||
} |
28099
395
76