jschardet
Advanced tools
Comparing version 1.5.1 to 1.6.0
{ | ||
"name": "jschardet", | ||
"version": "1.5.1", | ||
"version": "1.6.0", | ||
"description": "Character encoding auto-detection in JavaScript (port of python's chardet)", | ||
@@ -5,0 +5,0 @@ "main": "src/init", |
@@ -62,5 +62,37 @@ /* | ||
// Input: aBuf is a string containing all different types of characters | ||
// Output: a string that contains all alphabetic letters, high-byte characters, and word immediately preceding `>`, but nothing else within `<>` | ||
// Ex: input - '¡£º <div blah blah> abcdef</div> apples! * and oranges 9jd93jd>' | ||
// output - '¡£º blah div apples and oranges jd jd ' | ||
this.filterWithEnglishLetters = function(aBuf) { | ||
// TODO | ||
return aBuf; | ||
var result = ''; | ||
var inTag = false; | ||
var prev = 0; | ||
for (var curr = 0; curr < aBuf.length; curr++) { | ||
var c = aBuf[curr]; | ||
if (c == '>') { | ||
inTag = false; | ||
} else if (c == '<') { | ||
inTag = true; | ||
} | ||
var isAlpha = /[a-zA-Z]/.test(c); | ||
var isASCII = /^[\x00-\x7F]*$/.test(c); | ||
if (isASCII && !isAlpha) { | ||
if (curr > prev && !inTag) { | ||
result = result + aBuf.substring(prev, curr) + ' '; | ||
} | ||
prev = curr + 1; | ||
} | ||
} | ||
if (!inTag) { | ||
result = result + aBuf.substring(prev); | ||
} | ||
return result; | ||
} | ||
@@ -67,0 +99,0 @@ } |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
1372050
17513