jschardet
Advanced tools
Comparing version 3.1.1 to 3.1.2
{ | ||
"name": "jschardet", | ||
"version": "3.1.1", | ||
"version": "3.1.2", | ||
"description": "Character encoding auto-detection in JavaScript (port of python's chardet)", | ||
@@ -5,0 +5,0 @@ "author": "António Afonso", |
@@ -66,7 +66,9 @@ /* | ||
// Input: aBuf is a string containing all different types of characters | ||
// Output: a string that contains all alphabetic letters, high-byte characters, and word immediately preceding `>`, but nothing else within `<>` | ||
// Ex: input - '¡£º <div blah blah> abcdef</div> apples! * and oranges 9jd93jd>' | ||
// output - '¡£º blah div apples and oranges jd jd ' | ||
this.filterWithEnglishLetters = function(aBuf) { | ||
// Returns a copy of aBuf that retains only the sequences of English | ||
// alphabet and high byte characters that are not between <> characters. | ||
// The exception are PHP tags which start with '<?' and end with '?>'. | ||
// This filter can be applied to all scripts which contain both English | ||
// characters and extended ASCII characters, but is currently only used by | ||
// Latin1Prober. | ||
this.removeXmlTags = function(aBuf) { | ||
var result = ''; | ||
@@ -77,20 +79,13 @@ var inTag = false; | ||
for (var curr = 0; curr < aBuf.length; curr++) { | ||
var c = aBuf[curr]; | ||
var c = aBuf[curr]; | ||
if (c == '>') { | ||
inTag = false; | ||
} else if (c == '<') { | ||
inTag = true; | ||
} | ||
var isAlpha = /[a-zA-Z]/.test(c); | ||
var isASCII = /^[\x00-\x7F]*$/.test(c); | ||
if (isASCII && !isAlpha) { | ||
if (curr > prev && !inTag) { | ||
result = result + aBuf.substring(prev, curr) + ' '; | ||
if (c == '>' && aBuf[curr-1] !== '?') { | ||
prev = curr + 1 | ||
inTag = false; | ||
} else if (c == '<' && aBuf[curr+1] !== '?') { | ||
if (curr > prev && !inTag) { | ||
result = result + aBuf.substring(prev, curr) + ' '; | ||
} | ||
inTag = true; | ||
} | ||
prev = curr + 1; | ||
} | ||
} | ||
@@ -97,0 +92,0 @@ |
@@ -119,4 +119,4 @@ /* | ||
this.feed = function(aBuf) { | ||
aBuf = this.filterWithEnglishLetters(aBuf); | ||
this.feed = function (aBuf) { | ||
aBuf = this.removeXmlTags(aBuf); | ||
for( var i = 0; i < aBuf.length; i++ ) { | ||
@@ -139,3 +139,2 @@ var c = aBuf.charCodeAt(i); | ||
var confidence; | ||
var constants; | ||
@@ -151,3 +150,3 @@ if( this.getState() == Constants.notMe ) { | ||
if( total < 0.01 ) { | ||
constants = 0.0; | ||
confidence = 0.0; | ||
} else { | ||
@@ -154,0 +153,0 @@ confidence = (this._mFreqCounter[3] / total) - (this._mFreqCounter[1] * 20 / total); |
@@ -60,3 +60,3 @@ /* | ||
const supportedEncodingsDenormalized = (function() { | ||
denormalizedEncodings = []; | ||
const denormalizedEncodings = []; | ||
for (const encoding of supportedEncodings) { | ||
@@ -75,3 +75,9 @@ denormalizedEncodings.push( | ||
if (typeof options.minimumThreshold !== "number") { | ||
options.minimumThreshold = 0.20; | ||
if (options.detectEncodings) { | ||
// If encodings are narrowed down by the user allow for | ||
// any threshold to be returned. | ||
options.minimumThreshold = 0; | ||
} else { | ||
options.minimumThreshold = 0.20; | ||
} | ||
} | ||
@@ -78,0 +84,0 @@ |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Copyleft License
License(Experimental) Copyleft license information was found
Found 1 instance in 1 package
Mixed license
License(Experimental) Package contains multiple licenses.
Found 1 instance in 1 package
Non-permissive License
License(Experimental) A license not known to be considered permissive was found
Found 1 instance in 1 package
Copyleft License
License(Experimental) Copyleft license information was found
Found 1 instance in 1 package
Mixed license
License(Experimental) Package contains multiple licenses.
Found 1 instance in 1 package
Non-permissive License
License(Experimental) A license not known to be considered permissive was found
Found 1 instance in 1 package
1317924
16255