Socket
Socket
Sign inDemoInstall

jschardet

Package Overview
Dependencies
Maintainers
1
Versions
25
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

jschardet - npm Package Compare versions

Comparing version 3.0.0 to 3.1.0

.github/workflows/build.yml

8

index.d.ts

@@ -5,4 +5,10 @@ export interface IDetectedMap {

}
export function detect(buffer: Buffer | string, options?: { minimumThreshold: number }): IDetectedMap;
export interface IOptionsMap {
minimumThreshold?: number,
detectEncodings?: Array<string>
}
export function detect(buffer: Buffer | string, options?: IOptionsMap): IDetectedMap;
export function detectAll(buffer: Buffer | string, options?: IOptionsMap): IDetectedMap[];
export function enableDebug(): void;

14

package.json
{
"name": "jschardet",
"version": "3.0.0",
"version": "3.1.0",
"description": "Character encoding auto-detection in JavaScript (port of python's chardet)",

@@ -20,10 +20,12 @@ "author": "António Afonso",

},
"dependencies": {},
"devDependencies": {
"browserify": "~12.0.1",
"google-closure-compiler": "20151015.0.0"
"browserify": "~17.0.0",
"google-closure-compiler": "20151015.0.0",
"jest": "^29.7.0"
},
"scripts": {
"dist-dev": "mkdir -p dist && browserify index.js -s jschardet --detect-globals false -o dist/jschardet.js",
"dist": "npm run dist-dev && java -jar node_modules/google-closure-compiler/compiler.jar --warning_level QUIET --compilation_level SIMPLE_OPTIMIZATIONS --js dist/jschardet.js > dist/jschardet.min.js"
"dist-dev": "mkdir -p dist && browserify index.js -s jschardet --detect-globals false -o dist/jschardet.js && ./scripts/show-size-changes.sh dist/jschardet.js",
"dist": "npm run dist-dev && java -jar node_modules/google-closure-compiler/compiler.jar --warning_level QUIET --compilation_level SIMPLE_OPTIMIZATIONS --language_in=ECMASCRIPT6_STRICT --language_out=ES5 --js dist/jschardet.js > dist/jschardet.min.js && ./scripts/show-size-changes.sh dist/jschardet.min.js",
"dist-size-changes": "./scripts/show-size-changes.sh dist/*",
"test": "jest"
},

@@ -30,0 +32,0 @@ "engines": {

@@ -17,3 +17,3 @@ [![NPM](https://nodei.co/npm/jschardet.png?downloads=true&downloadRank=true)](https://nodei.co/npm/jschardet/)

### Node
```
```
npm install jschardet

@@ -58,2 +58,6 @@ ```

jschardet.detect(str, { minimumThreshold: 0 });
// Lock down which encodings to detect, can be useful in situations jschardet
// is giving a higher probability to encodings that you never use.
jschardet.detect(str, { detectEncodings: ["UTF-8", "windows-1252"] });
```

@@ -60,0 +64,0 @@

@@ -66,2 +66,6 @@ /*

this.getSupportedCharsetNames = function() {
throw new Error("Unimplemented method getSupportedCharsetNames()");
}
this.feed = function(aBuf) {

@@ -68,0 +72,0 @@ for( var i = 0, prober; prober = this._mProbers[i]; i++ ) {

@@ -41,2 +41,6 @@ /*

this.getSupportedCharsetNames = function() {
throw new Error("Unimplemented method getSupportedCharsetNames()");
}
this.feed = function(aBuf) {

@@ -43,0 +47,0 @@ }

@@ -47,2 +47,6 @@ /*

];
self._supportedCharsetNames = [];
for (const codingSM of self._mCodingSM) {
self._supportedCharsetNames.push(codingSM.getCodingStateMachine());
}
self.reset();

@@ -66,2 +70,6 @@ }

this.getSupportedCharsetNames = function() {
return self._supportedCharsetNames;
}
this.getConfidence = function() {

@@ -68,0 +76,0 @@ if( this._mDetectedCharset ) {

@@ -74,4 +74,5 @@ /*

this._mLastChar[1] = aBuf[0];
this._mContextAnalyzer.feed(this._mLastChar, charLen);
this._mDistributionAnalyzer.feed(this._mLastChar, charLen);
var lastCharStr = this._mLastChar.join('');
this._mContextAnalyzer.feed(lastCharStr, charLen);
this._mDistributionAnalyzer.feed(lastCharStr, charLen);
} else {

@@ -78,0 +79,0 @@ this._mContextAnalyzer.feed(aBuf.slice(i-1,i+1), charLen);

@@ -115,2 +115,6 @@ /*

this.getSupportedCharsetNames = function() {
return [this.getCharsetName()];
}
this.feed = function(aBuf) {

@@ -117,0 +121,0 @@ aBuf = this.filterWithEnglishLetters(aBuf);

@@ -42,4 +42,3 @@ /*

self._mCodingSM = null;
//self._mLastChar = ["\x00", "\x00"];
self._mLastChar = "\x00\x00";
self._mLastChar = ["\x00", "\x00"];
}

@@ -55,4 +54,3 @@

}
//this._mLastChar = ["\x00", "\x00"];
this._mLastChar = "\x00\x00";
this._mLastChar = ["\x00", "\x00"];
}

@@ -78,3 +76,3 @@

this._mLastChar[1] = aBuf[0];
this._mDistributionAnalyzer.feed(this._mLastChar, charLen);
this._mDistributionAnalyzer.feed(this._mLastChar.join(''), charLen);
} else {

@@ -81,0 +79,0 @@ this._mDistributionAnalyzer.feed(aBuf.slice(i-1,i+1), charLen);

@@ -50,2 +50,12 @@ /*

];
const supportedCharsetNames = (function() {
const charsetNames = [];
for (const prober of this._mProbers) {
charsetNames.push(prober.getCharsetName())
}
return charsetNames;
});
this.getSupportedCharsetNames = function() {
return supportedCharsetNames;
}
this.reset();

@@ -52,0 +62,0 @@ }

@@ -67,5 +67,14 @@ /*

self._supportedCharsetNames = [];
for (const prober of self._mProbers) {
self._supportedCharsetNames.push(prober.getCharsetName())
}
self.reset();
}
this.getSupportedCharsetNames = function() {
return self._supportedCharsetNames;
}
init();

@@ -72,0 +81,0 @@ }

@@ -74,4 +74,4 @@ /*

this._mLastChar[1] = aBuf[0];
this._mContextAnalyzer.feed(this._mLastChar.slice(2 - charLen), charLen);
this._mDistributionAnalyzer.feed(this._mLastChar, charLen);
this._mContextAnalyzer.feed(this._mLastChar.slice(2 - charLen).join(''), charLen);
this._mDistributionAnalyzer.feed(this._mLastChar.join(''), charLen);
} else {

@@ -78,0 +78,0 @@ this._mContextAnalyzer.feed(aBuf.slice(i + 1 - charLen, i + 3 - charLen), charLen);

@@ -38,5 +38,34 @@ /*

var Latin1Prober = require('./latin1prober');
var EscCharSetProber = require('./escprober')
var EscCharSetProber = require('./escprober');
var logger = require('./logger');
const supportedEncodings = (function() {
const BOM_UTF = [
"UTF-8", "UTF-32LE", "UTF-32BE", "UTF-32BE", "UTF-16LE", "UTF-16BE",
"X-ISO-10646-UCS-4-3412", "X-ISO-10646-UCS-4-2143"
]
const probers = [
new EscCharSetProber(),
new MBCSGroupProber(),
new SBCSGroupProber(),
new Latin1Prober()
];
const encodings = BOM_UTF.slice(0);
for (const prober of probers) {
[].push.apply(encodings, prober.getSupportedCharsetNames());
}
return encodings;
})();
const supportedEncodingsDenormalized = (function() {
denormalizedEncodings = [];
for (const encoding of supportedEncodings) {
denormalizedEncodings.push(
encoding.toLocaleLowerCase(),
encoding.toLocaleLowerCase().replace(/-/g, "")
);
}
return denormalizedEncodings;
})();
function UniversalDetector(options) {

@@ -46,2 +75,10 @@ if (!options) options = {};

if (options.detectEncodings) {
for (const encoding of options.detectEncodings) {
if (!supportedEncodingsDenormalized.includes(encoding.toLowerCase())) {
throw new Error(`Encoding ${encoding} is not supported. Supported encodings: ${supportedEncodings}.`);
}
}
}
var _state = {

@@ -62,2 +99,9 @@ pureAscii : 0,

function canDetectEncoding(encoding) {
if (!options.detectEncodings) {
return true;
}
return options.detectEncodings.includes(encoding.toLowerCase());
}
this.reset = function() {

@@ -70,3 +114,3 @@ this.result = {"encoding": null, "confidence": 0.0};

this._mInputState = _state.pureAscii;
this._mLastChar = "";
this._mLastChar = [];
this._mBOM = "";

@@ -90,21 +134,21 @@ if( this._mEscCharsetProber ) {

// If the data starts with BOM, we know it is UTF
if( this._mBOM.slice(0,3) == "\xEF\xBB\xBF" ) {
if( this._mBOM.slice(0,3) == "\xEF\xBB\xBF" && canDetectEncoding("UTF-8")) {
// EF BB BF UTF-8 with BOM
this.result = {"encoding": "UTF-8", "confidence": 1.0};
} else if( this._mBOM.slice(0,4) == "\xFF\xFE\x00\x00" ) {
} else if( this._mBOM.slice(0,4) == "\xFF\xFE\x00\x00" && canDetectEncoding("UTF-32LE") ) {
// FF FE 00 00 UTF-32, little-endian BOM
this.result = {"encoding": "UTF-32LE", "confidence": 1.0};
} else if( this._mBOM.slice(0,4) == "\x00\x00\xFE\xFF" ) {
} else if( this._mBOM.slice(0,4) == "\x00\x00\xFE\xFF" && canDetectEncoding("UTF-32BE")) {
// 00 00 FE FF UTF-32, big-endian BOM
this.result = {"encoding": "UTF-32BE", "confidence": 1.0};
} else if( this._mBOM.slice(0,4) == "\xFE\xFF\x00\x00" ) {
} else if( this._mBOM.slice(0,4) == "\xFE\xFF\x00\x00" && canDetectEncoding("X-ISO-10646-UCS-4-3412")) {
// FE FF 00 00 UCS-4, unusual octet order BOM (3412)
this.result = {"encoding": "X-ISO-10646-UCS-4-3412", "confidence": 1.0};
} else if( this._mBOM.slice(0,4) == "\x00\x00\xFF\xFE" ) {
} else if( this._mBOM.slice(0,4) == "\x00\x00\xFF\xFE" && canDetectEncoding("X-ISO-10646-UCS-4-2143")) {
// 00 00 FF FE UCS-4, unusual octet order BOM (2143)
this.result = {"encoding": "X-ISO-10646-UCS-4-2143", "confidence": 1.0};
} else if( this._mBOM.slice(0,2) == "\xFF\xFE" ) {
} else if( this._mBOM.slice(0,2) == "\xFF\xFE" && canDetectEncoding("UTF-16LE")) {
// FF FE UTF-16, little endian BOM
this.result = {"encoding": "UTF-16LE", "confidence": 1.0};
} else if( this._mBOM.slice(0,2) == "\xFE\xFF" ) {
} else if( this._mBOM.slice(0,2) == "\xFE\xFF" && canDetectEncoding("UTF-16BE")) {
// FE FF UTF-16, big endian BOM

@@ -133,3 +177,3 @@ this.result = {"encoding": "UTF-16BE", "confidence": 1.0};

this._mInputState = _state.highbyte;
} else if( this._escDetector.test(this._mLastChar + aBuf) ) {
} else if( this._escDetector.test(this._mLastChar.join('') + aBuf) ) {
this._mInputState = _state.escAscii;

@@ -139,3 +183,3 @@ }

this._mLastChar = aBuf.slice(-1);
this._mLastChar = aBuf.slice(-1).split('');

@@ -146,3 +190,3 @@ if( this._mInputState == _state.escAscii ) {

}
if( this._mEscCharsetProber.feed(aBuf) == constants.foundIt ) {
if( this._mEscCharsetProber.feed(aBuf) == constants.foundIt && canDetectEncoding(this._mEscCharsetProber.getCharsetName()) ) {
this.result = {

@@ -164,3 +208,3 @@ "encoding": this._mEscCharsetProber.getCharsetName(),

for( var i = 0, prober; prober = this._mCharsetProbers[i]; i++ ) {
if( prober.feed(aBuf) == constants.foundIt ) {
if( prober.feed(aBuf) == constants.foundIt && canDetectEncoding(prober.getCharsetName()) ) {
this.result = {

@@ -186,3 +230,3 @@ "encoding": prober.getCharsetName(),

if( this._mInputState == _state.pureAscii ) {
if( this._mInputState == _state.pureAscii && canDetectEncoding("ascii") ) {
logger.log("pure ascii")

@@ -196,3 +240,3 @@ this.result = {"encoding": "ascii", "confidence": 1.0};

for( var i = 0, prober; prober = this._mCharsetProbers[i]; i++ ) {
if( !prober || !prober.getCharsetName()) continue;
if( !prober || !prober.getCharsetName() || !canDetectEncoding(prober.getCharsetName()) ) continue;
this.results.push({

@@ -219,3 +263,3 @@ "encoding": prober.getCharsetName(),

for( var i = 0, prober; prober = this._mCharsetProbers[i]; i++ ) {
if( !prober ) continue;
if( !prober || !canDetectEncoding(prober.getCharsetName()) ) continue;
logger.log(prober.getCharsetName() + " confidence = " +

@@ -222,0 +266,0 @@ prober.getConfidence() + "\n");

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc