Comparing version 3.1.1 to 3.1.2
{ | ||
"name": "jschardet", | ||
"version": "3.1.1", | ||
"version": "3.1.2", | ||
"description": "Character encoding auto-detection in JavaScript (port of python's chardet)", | ||
@@ -5,0 +5,0 @@ "author": "António Afonso", |
@@ -66,7 +66,9 @@ /* | ||
// Input: aBuf is a string containing all different types of characters | ||
// Output: a string that contains all alphabetic letters, high-byte characters, and word immediately preceding `>`, but nothing else within `<>` | ||
// Ex: input - '¡£º <div blah blah> abcdef</div> apples! * and oranges 9jd93jd>' | ||
// output - '¡£º blah div apples and oranges jd jd ' | ||
this.filterWithEnglishLetters = function(aBuf) { | ||
// Returns a copy of aBuf that retains only the sequences of English | ||
// alphabet and high byte characters that are not between <> characters. | ||
// The exception are PHP tags which start with '<?' and end with '?>'. | ||
// This filter can be applied to all scripts which contain both English | ||
// characters and extended ASCII characters, but is currently only used by | ||
// Latin1Prober. | ||
this.removeXmlTags = function(aBuf) { | ||
var result = ''; | ||
@@ -77,20 +79,13 @@ var inTag = false; | ||
for (var curr = 0; curr < aBuf.length; curr++) { | ||
var c = aBuf[curr]; | ||
var c = aBuf[curr]; | ||
if (c == '>') { | ||
inTag = false; | ||
} else if (c == '<') { | ||
inTag = true; | ||
} | ||
var isAlpha = /[a-zA-Z]/.test(c); | ||
var isASCII = /^[\x00-\x7F]*$/.test(c); | ||
if (isASCII && !isAlpha) { | ||
if (curr > prev && !inTag) { | ||
result = result + aBuf.substring(prev, curr) + ' '; | ||
if (c == '>' && aBuf[curr-1] !== '?') { | ||
prev = curr + 1 | ||
inTag = false; | ||
} else if (c == '<' && aBuf[curr+1] !== '?') { | ||
if (curr > prev && !inTag) { | ||
result = result + aBuf.substring(prev, curr) + ' '; | ||
} | ||
inTag = true; | ||
} | ||
prev = curr + 1; | ||
} | ||
} | ||
@@ -97,0 +92,0 @@ |
@@ -119,4 +119,4 @@ /* | ||
this.feed = function(aBuf) { | ||
aBuf = this.filterWithEnglishLetters(aBuf); | ||
this.feed = function (aBuf) { | ||
aBuf = this.removeXmlTags(aBuf); | ||
for( var i = 0; i < aBuf.length; i++ ) { | ||
@@ -139,3 +139,2 @@ var c = aBuf.charCodeAt(i); | ||
var confidence; | ||
var constants; | ||
@@ -151,3 +150,3 @@ if( this.getState() == Constants.notMe ) { | ||
if( total < 0.01 ) { | ||
constants = 0.0; | ||
confidence = 0.0; | ||
} else { | ||
@@ -154,0 +153,0 @@ confidence = (this._mFreqCounter[3] / total) - (this._mFreqCounter[1] * 20 / total); |
@@ -60,3 +60,3 @@ /* | ||
const supportedEncodingsDenormalized = (function() { | ||
denormalizedEncodings = []; | ||
const denormalizedEncodings = []; | ||
for (const encoding of supportedEncodings) { | ||
@@ -75,3 +75,9 @@ denormalizedEncodings.push( | ||
if (typeof options.minimumThreshold !== "number") { | ||
options.minimumThreshold = 0.20; | ||
if (options.detectEncodings) { | ||
// If encodings are narrowed down by the user allow for | ||
// any threshold to be returned. | ||
options.minimumThreshold = 0; | ||
} else { | ||
options.minimumThreshold = 0.20; | ||
} | ||
} | ||
@@ -78,0 +84,0 @@ |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
1317924
16255