Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

codepage

Package Overview
Dependencies
Maintainers
1
Versions
31
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

codepage - npm Package Compare versions

Comparing version 1.4.0 to 1.5.0

102

cputils.js
/* cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
/* vim: set ft=javascript: */
/*jshint newcap: false */
(function(root, factory){
(function(root, factory) {
"use strict";

@@ -30,7 +31,7 @@ if(typeof cptable === "undefined") {

var magic_encode = {};
var cpdcache = {};
var cpecache = {};
var cpdcache = {};
var sfcc = function sfcc(x) { return String.fromCharCode(x); };
var cca = function cca(x){ return x.charCodeAt(0); };
var cca = function cca(x) { return x.charCodeAt(0); };

@@ -54,8 +55,8 @@ var has_buf = (typeof Buffer !== 'undefined');

var len = data.length;
var out, i, j, D, w;
var out, i=0, j=0, D=0, w=0;
if(typeof data === 'string') {
out = Buffer(len);
out = new Buffer(len);
for(i = 0; i < len; ++i) out[i] = EE[data.charCodeAt(i)];
} else if(Buffer.isBuffer(data)) {
out = Buffer(2*len);
out = new Buffer(2*len);
j = 0;

@@ -75,6 +76,6 @@ for(i = 0; i < len; ++i) {

} else {
out = Buffer(len);
out = new Buffer(len);
for(i = 0; i < len; ++i) out[i] = EE[data[i].charCodeAt(0)];
}
if(ofmt === undefined || ofmt === 'buf') return out;
if(!ofmt || ofmt === 'buf') return out;
if(ofmt !== 'arr') return out.toString('binary');

@@ -86,3 +87,3 @@ return [].slice.call(out);

var D = cpt[cp].dec;
var DD = new Buffer(131072), d=0, c;
var DD = new Buffer(131072), d=0, c="";
for(d=0;d<D.length;++d) {

@@ -94,3 +95,3 @@ if(!(c=D[d])) continue;

return function sbcs_d(data) {
var len = data.length, i=0, j;
var len = data.length, i=0, j=0;
if(2 * len > mdl) { mdl = 2 * len; mdb = new Buffer(mdl); }

@@ -127,3 +128,3 @@ if(Buffer.isBuffer(data)) {

return function dbcs_e(data, ofmt) {
var len = data.length, out = new Buffer(2*len), i, j, jj, k, D;
var len = data.length, out = new Buffer(2*len), i=0, j=0, jj=0, k=0, D=0;
if(typeof data === 'string') {

@@ -155,3 +156,3 @@ for(i = k = 0; i < len; ++i) {

}
if(ofmt === undefined || ofmt === 'buf') return out;
if(!ofmt || ofmt === 'buf') return out;
if(ofmt !== 'arr') return out.toString('binary');

@@ -172,3 +173,3 @@ return [].slice.call(out);

return function dbcs_d(data) {
var len = data.length, out = new Buffer(2*len), i, j, k=0;
var len = data.length, out = new Buffer(2*len), i=0, j=0, k=0;
if(Buffer.isBuffer(data)) {

@@ -197,2 +198,3 @@ for(i = 0; i < len; i++) {

magic_decode[65001] = function utf8_d(data) {
if(typeof data === "string") return utf8_d(data.split("").map(cca));
var len = data.length, w = 0, ww = 0;

@@ -217,2 +219,7 @@ if(4 * len > mdl) { mdl = 4 * len; mdb = new Buffer(mdl); }

magic_encode[65001] = function utf8_e(data, ofmt) {
if(has_buf && Buffer.isBuffer(data)) {
if(!ofmt || ofmt === 'buf') return data;
if(ofmt !== 'arr') return data.toString('binary');
return [].slice.call(data);
}
var len = data.length, w = 0, ww = 0, j = 0;

@@ -240,3 +247,3 @@ var direct = typeof data === "string";

}
if(ofmt === undefined || ofmt === 'buf') return mdb.slice(0,j);
if(!ofmt || ofmt === 'buf') return mdb.slice(0,j);
if(ofmt !== 'arr') return mdb.slice(0,j).toString('binary');

@@ -250,3 +257,3 @@ return [].slice.call(mdb, 0, j);

if(cpdcache[sbcs_cache[0]]) return;
var i, s;
var i=0, s=0;
for(i = 0; i < sbcs_cache.length; ++i) {

@@ -273,3 +280,4 @@ s = sbcs_cache[i];

};
var cp_decache = function cp_decache(cp) { cpdcache[cp] = cpecache[cp] = undefined; };
var null_enc = function(data, ofmt) { return ""; };
var cp_decache = function cp_decache(cp) { delete cpdcache[cp]; delete cpecache[cp]; };
var decache = function decache() {

@@ -282,3 +290,3 @@ if(has_buf) {

}
last_enc = last_cp = undefined;
last_enc = null_enc; last_cp = 0;
};

@@ -296,17 +304,16 @@ var cache = {

var SetD = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?";
var last_enc, last_cp;
var last_enc = null_enc, last_cp = 0;
var encode = function encode(cp, data, ofmt) {
if(cp === last_cp) { return last_enc(data, ofmt); }
if(cpecache[cp] !== undefined) { last_enc = cpecache[last_cp=cp]; return last_enc(data, ofmt); }
if(cp === last_cp && last_enc) { return last_enc(data, ofmt); }
if(cpecache[cp]) { last_enc = cpecache[last_cp=cp]; return last_enc(data, ofmt); }
if(has_buf && Buffer.isBuffer(data)) data = data.toString('utf8');
var len = data.length;
var out = has_buf ? new Buffer(4*len) : [], w, i, j = 0, c, tt, ww;
var C = cpt[cp], E, M;
var out = has_buf ? new Buffer(4*len) : [], w=0, i=0, j = 0, ww=0;
var C = cpt[cp], E, M = "";
if(C && (E=C.enc)) for(i = 0; i < len; ++i, ++j) {
w = E[data[i]];
out[j] = w&255;
if(w > 255) {
out[j] = w>>8;
out[++j] = w&255;
}
} else out[j] = w&255;
}

@@ -376,3 +383,3 @@ else if((M=magic[cp])) switch(M) {

out[j+1] = w&255; w >>= 8;
out[j] = w&255; w >>= 8;
out[j] = w&255;
j+=4;

@@ -383,6 +390,6 @@ }

for(i = 0; i < len; i++) {
c = data[i];
var c = data[i];
if(c === "+") { out[j++] = 0x2b; out[j++] = 0x2d; continue; }
if(SetD.indexOf(c) > -1) { out[j++] = c.charCodeAt(0); continue; }
tt = encode(1201, c);
var tt = encode(1201, c);
out[j++] = 0x2b;

@@ -399,4 +406,4 @@ out[j++] = BM.charCodeAt(tt[0]>>2);

out = out.slice(0,j);
if(typeof Buffer === 'undefined') return (ofmt == 'str') ? out.map(sfcc).join("") : out;
if(ofmt === undefined || ofmt === 'buf') return out;
if(!has_buf) return (ofmt == 'str') ? (out).map(sfcc).join("") : out;
if(!ofmt || ofmt === 'buf') return out;
if(ofmt !== 'arr') return out.toString('binary');

@@ -407,15 +414,15 @@ return [].slice.call(out);

var F; if((F=cpdcache[cp])) return F(data);
var len = data.length, out = new Array(len), w, i, j = 1, k = 0, ww;
var C = cpt[cp], D, M;
if(typeof data === "string") return decode(cp, data.split("").map(cca));
var len = data.length, out = new Array(len), s="", w=0, i=0, j=1, k=0, ww=0;
var C = cpt[cp], D, M="";
if(C && (D=C.dec)) {
if(typeof data === "string") data = data.split("").map(cca);
for(i = 0; i < len; i+=j) {
j = 2;
w = D[(data[i]<<8)+ data[i+1]];
if(!w) {
s = D[(data[i]<<8)+ data[i+1]];
if(!s) {
j = 1;
w = D[data[i]];
s = D[data[i]];
}
if(!w) throw new Error('Unrecognized code: ' + data[i] + ' ' + data[i+j-1] + ' ' + i + ' ' + j + ' ' + D[data[i]]);
out[k++] = w;
if(!s) throw new Error('Unrecognized code: ' + data[i] + ' ' + data[i+j-1] + ' ' + i + ' ' + j + ' ' + D[data[i]]);
out[k++] = s;
}

@@ -425,3 +432,2 @@ }

case "utf8":
i = 0;
if(len >= 3 && data[0] == 0xEF) if(data[1] == 0xBB && data[2] == 0xBF) i = 3;

@@ -446,7 +452,6 @@ for(; i < len; i+=j) {

case "utf16le":
i = 0;
if(len >= 2 && data[0] == 0xFF) if(data[1] == 0xFE) i = 2;
if(has_buf && Buffer.isBuffer(data)) return data.toString(M);
j = 2;
for(; i < len; i+=j) {
for(; i+1 < len; i+=j) {
out[k++] = String.fromCharCode((data[i+1]<<8) + data[i]);

@@ -456,6 +461,5 @@ }

case "utf16be":
i = 0;
if(len >= 2 && data[0] == 0xFE) if(data[1] == 0xFF) i = 2;
j = 2;
for(; i < len; i+=j) {
for(; i+1 < len; i+=j) {
out[k++] = String.fromCharCode((data[i]<<8) + data[i+1]);

@@ -465,3 +469,2 @@ }

case "utf32le":
i = 0;
if(len >= 4 && data[0] == 0xFF) if(data[1] == 0xFE && data[2] === 0 && data[3] === 0) i = 4;

@@ -480,3 +483,2 @@ j = 4;

case "utf32be":
i = 0;
if(len >= 4 && data[3] == 0xFF) if(data[2] == 0xFE && data[1] === 0 && data[0] === 0) i = 4;

@@ -495,3 +497,2 @@ j = 4;

case "utf7":
i = 0;
if(len >= 4 && data[0] == 0x2B && data[1] == 0x2F && data[2] == 0x76) {

@@ -509,5 +510,5 @@ if(len >= 5 && data[3] == 0x38 && data[4] == 0x2D) i = 5;

var tt = [];
var o64;
var c1, c2, c3;
var e1, e2, e3, e4;
var o64 = "";
var c1=0, c2=0, c3=0;
var e1=0, e2=0, e3=0, e4=0;
for(var l = 1; l < j - dash;) {

@@ -527,5 +528,4 @@ e1 = BM.indexOf(String.fromCharCode(data[i+l++]));

}
if((tt.length & 1) === 1) tt.length--;
o64 = decode(1201, tt);
for(l = 0; l < o64.length; ++l) out[k++] = o64[l];
for(l = 0; l < o64.length; ++l) out[k++] = o64.charAt(l);
}

@@ -538,5 +538,5 @@ break;

};
var hascp = function hascp(cp) { return cpt[cp] || magic[cp]; };
var hascp = function hascp(cp) { return !!(cpt[cp] || magic[cp]); };
cpt.utils = { decode: decode, encode: encode, hascp: hascp, magic: magic, cache:cache };
return cpt;
}));
/* cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
/* vim: set ft=javascript: */
/*jshint newcap: false */
(function(root, factory){
(function(root, factory) {
"use strict";

@@ -30,7 +31,7 @@ if(typeof cptable === "undefined") {

var magic_encode = {};
var cpdcache = {};
var cpecache = {};
var cpdcache = {};
var sfcc = function sfcc(x) { return String.fromCharCode(x); };
var cca = function cca(x){ return x.charCodeAt(0); };
var cca = function cca(x) { return x.charCodeAt(0); };

@@ -54,8 +55,8 @@ var has_buf = (typeof Buffer !== 'undefined');

var len = data.length;
var out, i, j, D, w;
var out, i=0, j=0, D=0, w=0;
if(typeof data === 'string') {
out = Buffer(len);
out = new Buffer(len);
for(i = 0; i < len; ++i) out[i] = EE[data.charCodeAt(i)];
} else if(Buffer.isBuffer(data)) {
out = Buffer(2*len);
out = new Buffer(2*len);
j = 0;

@@ -75,6 +76,6 @@ for(i = 0; i < len; ++i) {

} else {
out = Buffer(len);
out = new Buffer(len);
for(i = 0; i < len; ++i) out[i] = EE[data[i].charCodeAt(0)];
}
if(ofmt === undefined || ofmt === 'buf') return out;
if(!ofmt || ofmt === 'buf') return out;
if(ofmt !== 'arr') return out.toString('binary');

@@ -86,3 +87,3 @@ return [].slice.call(out);

var D = cpt[cp].dec;
var DD = new Buffer(131072), d=0, c;
var DD = new Buffer(131072), d=0, c="";
for(d=0;d<D.length;++d) {

@@ -94,3 +95,3 @@ if(!(c=D[d])) continue;

return function sbcs_d(data) {
var len = data.length, i=0, j;
var len = data.length, i=0, j=0;
if(2 * len > mdl) { mdl = 2 * len; mdb = new Buffer(mdl); }

@@ -127,3 +128,3 @@ if(Buffer.isBuffer(data)) {

return function dbcs_e(data, ofmt) {
var len = data.length, out = new Buffer(2*len), i, j, jj, k, D;
var len = data.length, out = new Buffer(2*len), i=0, j=0, jj=0, k=0, D=0;
if(typeof data === 'string') {

@@ -155,3 +156,3 @@ for(i = k = 0; i < len; ++i) {

}
if(ofmt === undefined || ofmt === 'buf') return out;
if(!ofmt || ofmt === 'buf') return out;
if(ofmt !== 'arr') return out.toString('binary');

@@ -172,3 +173,3 @@ return [].slice.call(out);

return function dbcs_d(data) {
var len = data.length, out = new Buffer(2*len), i, j, k=0;
var len = data.length, out = new Buffer(2*len), i=0, j=0, k=0;
if(Buffer.isBuffer(data)) {

@@ -197,2 +198,3 @@ for(i = 0; i < len; i++) {

magic_decode[65001] = function utf8_d(data) {
if(typeof data === "string") return utf8_d(data.split("").map(cca));
var len = data.length, w = 0, ww = 0;

@@ -217,2 +219,7 @@ if(4 * len > mdl) { mdl = 4 * len; mdb = new Buffer(mdl); }

magic_encode[65001] = function utf8_e(data, ofmt) {
if(has_buf && Buffer.isBuffer(data)) {
if(!ofmt || ofmt === 'buf') return data;
if(ofmt !== 'arr') return data.toString('binary');
return [].slice.call(data);
}
var len = data.length, w = 0, ww = 0, j = 0;

@@ -240,3 +247,3 @@ var direct = typeof data === "string";

}
if(ofmt === undefined || ofmt === 'buf') return mdb.slice(0,j);
if(!ofmt || ofmt === 'buf') return mdb.slice(0,j);
if(ofmt !== 'arr') return mdb.slice(0,j).toString('binary');

@@ -250,3 +257,3 @@ return [].slice.call(mdb, 0, j);

if(cpdcache[sbcs_cache[0]]) return;
var i, s;
var i=0, s=0;
for(i = 0; i < sbcs_cache.length; ++i) {

@@ -273,3 +280,4 @@ s = sbcs_cache[i];

};
var cp_decache = function cp_decache(cp) { cpdcache[cp] = cpecache[cp] = undefined; };
var null_enc = function(data, ofmt) { return ""; };
var cp_decache = function cp_decache(cp) { delete cpdcache[cp]; delete cpecache[cp]; };
var decache = function decache() {

@@ -282,3 +290,3 @@ if(has_buf) {

}
last_enc = last_cp = undefined;
last_enc = null_enc; last_cp = 0;
};

@@ -296,17 +304,16 @@ var cache = {

var SetD = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?";
var last_enc, last_cp;
var last_enc = null_enc, last_cp = 0;
var encode = function encode(cp, data, ofmt) {
if(cp === last_cp) { return last_enc(data, ofmt); }
if(cpecache[cp] !== undefined) { last_enc = cpecache[last_cp=cp]; return last_enc(data, ofmt); }
if(cp === last_cp && last_enc) { return last_enc(data, ofmt); }
if(cpecache[cp]) { last_enc = cpecache[last_cp=cp]; return last_enc(data, ofmt); }
if(has_buf && Buffer.isBuffer(data)) data = data.toString('utf8');
var len = data.length;
var out = has_buf ? new Buffer(4*len) : [], w, i, j = 0, c, tt, ww;
var C = cpt[cp], E, M;
var out = has_buf ? new Buffer(4*len) : [], w=0, i=0, j = 0, ww=0;
var C = cpt[cp], E, M = "";
if(C && (E=C.enc)) for(i = 0; i < len; ++i, ++j) {
w = E[data[i]];
out[j] = w&255;
if(w > 255) {
out[j] = w>>8;
out[++j] = w&255;
}
} else out[j] = w&255;
}

@@ -376,3 +383,3 @@ else if((M=magic[cp])) switch(M) {

out[j+1] = w&255; w >>= 8;
out[j] = w&255; w >>= 8;
out[j] = w&255;
j+=4;

@@ -383,6 +390,6 @@ }

for(i = 0; i < len; i++) {
c = data[i];
var c = data[i];
if(c === "+") { out[j++] = 0x2b; out[j++] = 0x2d; continue; }
if(SetD.indexOf(c) > -1) { out[j++] = c.charCodeAt(0); continue; }
tt = encode(1201, c);
var tt = encode(1201, c);
out[j++] = 0x2b;

@@ -399,4 +406,4 @@ out[j++] = BM.charCodeAt(tt[0]>>2);

out = out.slice(0,j);
if(typeof Buffer === 'undefined') return (ofmt == 'str') ? out.map(sfcc).join("") : out;
if(ofmt === undefined || ofmt === 'buf') return out;
if(!has_buf) return (ofmt == 'str') ? (out).map(sfcc).join("") : out;
if(!ofmt || ofmt === 'buf') return out;
if(ofmt !== 'arr') return out.toString('binary');

@@ -407,15 +414,15 @@ return [].slice.call(out);

var F; if((F=cpdcache[cp])) return F(data);
var len = data.length, out = new Array(len), w, i, j = 1, k = 0, ww;
var C = cpt[cp], D, M;
if(typeof data === "string") return decode(cp, data.split("").map(cca));
var len = data.length, out = new Array(len), s="", w=0, i=0, j=1, k=0, ww=0;
var C = cpt[cp], D, M="";
if(C && (D=C.dec)) {
if(typeof data === "string") data = data.split("").map(cca);
for(i = 0; i < len; i+=j) {
j = 2;
w = D[(data[i]<<8)+ data[i+1]];
if(!w) {
s = D[(data[i]<<8)+ data[i+1]];
if(!s) {
j = 1;
w = D[data[i]];
s = D[data[i]];
}
if(!w) throw new Error('Unrecognized code: ' + data[i] + ' ' + data[i+j-1] + ' ' + i + ' ' + j + ' ' + D[data[i]]);
out[k++] = w;
if(!s) throw new Error('Unrecognized code: ' + data[i] + ' ' + data[i+j-1] + ' ' + i + ' ' + j + ' ' + D[data[i]]);
out[k++] = s;
}

@@ -425,3 +432,2 @@ }

case "utf8":
i = 0;
if(len >= 3 && data[0] == 0xEF) if(data[1] == 0xBB && data[2] == 0xBF) i = 3;

@@ -446,7 +452,6 @@ for(; i < len; i+=j) {

case "utf16le":
i = 0;
if(len >= 2 && data[0] == 0xFF) if(data[1] == 0xFE) i = 2;
if(has_buf && Buffer.isBuffer(data)) return data.toString(M);
j = 2;
for(; i < len; i+=j) {
for(; i+1 < len; i+=j) {
out[k++] = String.fromCharCode((data[i+1]<<8) + data[i]);

@@ -456,6 +461,5 @@ }

case "utf16be":
i = 0;
if(len >= 2 && data[0] == 0xFE) if(data[1] == 0xFF) i = 2;
j = 2;
for(; i < len; i+=j) {
for(; i+1 < len; i+=j) {
out[k++] = String.fromCharCode((data[i]<<8) + data[i+1]);

@@ -465,3 +469,2 @@ }

case "utf32le":
i = 0;
if(len >= 4 && data[0] == 0xFF) if(data[1] == 0xFE && data[2] === 0 && data[3] === 0) i = 4;

@@ -480,3 +483,2 @@ j = 4;

case "utf32be":
i = 0;
if(len >= 4 && data[3] == 0xFF) if(data[2] == 0xFE && data[1] === 0 && data[0] === 0) i = 4;

@@ -495,3 +497,2 @@ j = 4;

case "utf7":
i = 0;
if(len >= 4 && data[0] == 0x2B && data[1] == 0x2F && data[2] == 0x76) {

@@ -509,5 +510,5 @@ if(len >= 5 && data[3] == 0x38 && data[4] == 0x2D) i = 5;

var tt = [];
var o64;
var c1, c2, c3;
var e1, e2, e3, e4;
var o64 = "";
var c1=0, c2=0, c3=0;
var e1=0, e2=0, e3=0, e4=0;
for(var l = 1; l < j - dash;) {

@@ -527,5 +528,4 @@ e1 = BM.indexOf(String.fromCharCode(data[i+l++]));

}
if((tt.length & 1) === 1) tt.length--;
o64 = decode(1201, tt);
for(l = 0; l < o64.length; ++l) out[k++] = o64[l];
for(l = 0; l < o64.length; ++l) out[k++] = o64.charAt(l);
}

@@ -538,5 +538,5 @@ break;

};
var hascp = function hascp(cp) { return cpt[cp] || magic[cp]; };
var hascp = function hascp(cp) { return !!(cpt[cp] || magic[cp]); };
cpt.utils = { decode: decode, encode: encode, hascp: hascp, magic: magic, cache:cache };
return cpt;
}));
{
"name": "codepage",
"version": "1.4.0",
"version": "1.5.0",
"author": "SheetJS",

@@ -36,3 +36,3 @@ "description": "pure-JS library to handle codepages",

"blanket": {
"pattern": "[cptable.js,cputils.js,cpexcel.js]"
"pattern": "[cputils.js]"
}

@@ -39,0 +39,0 @@ },

@@ -50,17 +50,23 @@ # Codepages for JS

The codepages are indexed by number. To get the unicode character for a given
Most codepages are indexed by number. To get the unicode character for a given
codepoint, use the `dec` property:
var unicode_cp10000_255 = cptable[10000].dec[255]; // ˇ
```js
var unicode_cp10000_255 = cptable[10000].dec[255]; // ˇ
```
To get the codepoint for a given character, use the `enc` property:
var cp10000_711 = cptable[10000].enc[String.fromCharCode(711)]; // 255
```js
var cp10000_711 = cptable[10000].enc[String.fromCharCode(711)]; // 255
```
There are a few utilities that deal with strings and buffers:
var 汇总 = cptable.utils.decode(936, [0xbb,0xe3,0xd7,0xdc]);
var buf = cptable.utils.encode(936, 汇总);
var sushi= cptable.utils.decode(65001, [0xf0,0x9f,0x8d,0xa3]); // 🍣
var sbuf = cptable.utils.encode(65001, sushi);
```js
var 汇总 = cptable.utils.decode(936, [0xbb,0xe3,0xd7,0xdc]);
var buf = cptable.utils.encode(936, 汇总);
var sushi= cptable.utils.decode(65001, [0xf0,0x9f,0x8d,0xa3]); // 🍣
var sbuf = cptable.utils.encode(65001, sushi);
```

@@ -71,3 +77,3 @@ `cptable.utils.encode(CP, data, ofmt)` accepts a String or Array of characters

- Default output is a Buffer (or Array) of bytes (integers between 0 and 255).
- If `ofmt == 'str'`, return a String where `o.charCodeAt(i)` is the ith byte
- If `ofmt == 'str'`, return a String where `o.charCodeAt(i)` is the `i`-th byte
- If `ofmt == 'arr'`, return an Array of bytes

@@ -84,3 +90,5 @@

var cptable = require('codepage/dist/cpexcel.full');
```js
var cptable = require('codepage/dist/cpexcel.full');
```

@@ -93,3 +101,5 @@ ## Rolling your own script

bash make.sh path_to_manifest output_file_name JSVAR
```bash
bash make.sh path_to_manifest output_file_name JSVAR
```

@@ -126,167 +136,183 @@ where

Some codepages are easier to implement algorithmically. Since these are
hardcoded in utils, there is no corresponding entry (they are "magic")
hardcoded in `utils`, there is no corresponding entry (they are "magic").
| CP# | Information | Description |
| --: | :----------: | :---------- |
| 37| unicode.org |IBM EBCDIC US-Canada
| 437| unicode.org |OEM United States
| 500| unicode.org |IBM EBCDIC International
| 620| NLS |Mazovia (Polish) MS-DOS
| 708|MakeEncoding.cs|Arabic (ASMO 708)
| 720|MakeEncoding.cs|Arabic (Transparent ASMO); Arabic (DOS)
| 737| unicode.org |OEM Greek (formerly 437G); Greek (DOS)
| 775| unicode.org |OEM Baltic; Baltic (DOS)
| 850| unicode.org |OEM Multilingual Latin 1; Western European (DOS)
| 852| unicode.org |OEM Latin 2; Central European (DOS)
| 855| unicode.org |OEM Cyrillic (primarily Russian)
| 857| unicode.org |OEM Turkish; Turkish (DOS)
| 858|MakeEncoding.cs|OEM Multilingual Latin 1 + Euro symbol
| 860| unicode.org |OEM Portuguese; Portuguese (DOS)
| 861| unicode.org |OEM Icelandic; Icelandic (DOS)
| 862| unicode.org |OEM Hebrew; Hebrew (DOS)
| 863| unicode.org |OEM French Canadian; French Canadian (DOS)
| 864| unicode.org |OEM Arabic; Arabic (864)
| 865| unicode.org |OEM Nordic; Nordic (DOS)
| 866| unicode.org |OEM Russian; Cyrillic (DOS)
| 869| unicode.org |OEM Modern Greek; Greek, Modern (DOS)
| 870|MakeEncoding.cs|IBM EBCDIC Multilingual/ROECE (Latin 2)
| 874| unicode.org |Windows Thai
| 875| unicode.org |IBM EBCDIC Greek Modern
| 895| NLS |Kamenický (Czech) MS-DOS
| 932| unicode.org |Japanese Shift-JIS
| 936| unicode.org |Simplified Chinese GBK
| 949| unicode.org |Korean
| 950| unicode.org |Traditional Chinese Big5
| 1026| unicode.org |IBM EBCDIC Turkish (Latin 5)
| 1047|MakeEncoding.cs|IBM EBCDIC Latin 1/Open System
| 1140|MakeEncoding.cs|IBM EBCDIC US-Canada (037 + Euro symbol)
| 1141|MakeEncoding.cs|IBM EBCDIC Germany (20273 + Euro symbol)
| 1142|MakeEncoding.cs|IBM EBCDIC Denmark-Norway (20277 + Euro symbol)
| 1143|MakeEncoding.cs|IBM EBCDIC Finland-Sweden (20278 + Euro symbol)
| 1144|MakeEncoding.cs|IBM EBCDIC Italy (20280 + Euro symbol)
| 1145|MakeEncoding.cs|IBM EBCDIC Latin America-Spain (20284 + Euro symbol)
| 1146|MakeEncoding.cs|IBM EBCDIC United Kingdom (20285 + Euro symbol)
| 1147|MakeEncoding.cs|IBM EBCDIC France (20297 + Euro symbol)
| 1148|MakeEncoding.cs|IBM EBCDIC International (500 + Euro symbol)
| 1149|MakeEncoding.cs|IBM EBCDIC Icelandic (20871 + Euro symbol)
| 1200| magic |Unicode UTF-16, little endian (BMP of ISO 10646)
| 1201| magic |Unicode UTF-16, big endian
| 1250| unicode.org |Windows Central Europe
| 1251| unicode.org |Windows Cyrillic
| 1252| unicode.org |Windows Latin I
| 1253| unicode.org |Windows Greek
| 1254| unicode.org |Windows Turkish
| 1255| unicode.org |Windows Hebrew
| 1256| unicode.org |Windows Arabic
| 1257| unicode.org |Windows Baltic
| 1258| unicode.org |Windows Vietnam
| 1361|MakeEncoding.cs|Korean (Johab)
|10000| unicode.org |MAC Roman
|10001|MakeEncoding.cs|Japanese (Mac)
|10002|MakeEncoding.cs|MAC Traditional Chinese (Big5)
|10003|MakeEncoding.cs|Korean (Mac)
|10004|MakeEncoding.cs|Arabic (Mac)
|10005|MakeEncoding.cs|Hebrew (Mac)
|10006| unicode.org |Greek (Mac)
|10007| unicode.org |Cyrillic (Mac)
|10008|MakeEncoding.cs|MAC Simplified Chinese (GB 2312)
|10010|MakeEncoding.cs|Romanian (Mac)
|10017|MakeEncoding.cs|Ukrainian (Mac)
|10021|MakeEncoding.cs|Thai (Mac)
|10029| unicode.org |MAC Latin 2 (Central European)
|10079| unicode.org |Icelandic (Mac)
|10081| unicode.org |Turkish (Mac)
|10082|MakeEncoding.cs|Croatian (Mac)
|12000| magic |Unicode UTF-32, little endian byte order
|12001| magic |Unicode UTF-32, big endian byte order
|20000|MakeEncoding.cs|CNS Taiwan (Chinese Traditional)
|20001|MakeEncoding.cs|TCA Taiwan
|20002|MakeEncoding.cs|Eten Taiwan (Chinese Traditional)
|20003|MakeEncoding.cs|IBM5550 Taiwan
|20004|MakeEncoding.cs|TeleText Taiwan
|20005|MakeEncoding.cs|Wang Taiwan
|20105|MakeEncoding.cs|Western European IA5 (IRV International Alphabet 5) 7-bit
|20106|MakeEncoding.cs|IA5 German (7-bit)
|20107|MakeEncoding.cs|IA5 Swedish (7-bit)
|20108|MakeEncoding.cs|IA5 Norwegian (7-bit)
|20127| magic |US-ASCII (7-bit)
|20261|MakeEncoding.cs|T.61
|20269|MakeEncoding.cs|ISO 6937 Non-Spacing Accent
|20273|MakeEncoding.cs|IBM EBCDIC Germany
|20277|MakeEncoding.cs|IBM EBCDIC Denmark-Norway
|20278|MakeEncoding.cs|IBM EBCDIC Finland-Sweden
|20280|MakeEncoding.cs|IBM EBCDIC Italy
|20284|MakeEncoding.cs|IBM EBCDIC Latin America-Spain
|20285|MakeEncoding.cs|IBM EBCDIC United Kingdom
|20290|MakeEncoding.cs|IBM EBCDIC Japanese Katakana Extended
|20297|MakeEncoding.cs|IBM EBCDIC France
|20420|MakeEncoding.cs|IBM EBCDIC Arabic
|20423|MakeEncoding.cs|IBM EBCDIC Greek
|20424|MakeEncoding.cs|IBM EBCDIC Hebrew
|20833|MakeEncoding.cs|IBM EBCDIC Korean Extended
|20838|MakeEncoding.cs|IBM EBCDIC Thai
|20866|MakeEncoding.cs|Russian Cyrillic (KOI8-R)
|20871|MakeEncoding.cs|IBM EBCDIC Icelandic
|20880|MakeEncoding.cs|IBM EBCDIC Cyrillic Russian
|20905|MakeEncoding.cs|IBM EBCDIC Turkish
|20924|MakeEncoding.cs|IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)
|20932|MakeEncoding.cs|Japanese (JIS 0208-1990 and 0212-1990)
|20936|MakeEncoding.cs|Simplified Chinese (GB2312-80)
|20949|MakeEncoding.cs|Korean Wansung
|21025|MakeEncoding.cs|IBM EBCDIC Cyrillic Serbian-Bulgarian
|21027| NLS |Extended/Ext Alpha Lowercase
|21866|MakeEncoding.cs|Ukrainian Cyrillic (KOI8-U)
|28591| unicode.org |ISO 8859-1 Latin 1 (Western European)
|28592| unicode.org |ISO 8859-2 Latin 2 (Central European)
|28593| unicode.org |ISO 8859-3 Latin 3
|28594| unicode.org |ISO 8859-4 Baltic
|28595| unicode.org |ISO 8859-5 Cyrillic
|28596| unicode.org |ISO 8859-6 Arabic
|28597| unicode.org |ISO 8859-7 Greek
|28598| unicode.org |ISO 8859-8 Hebrew (ISO-Visual)
|28599| unicode.org |ISO 8859-9 Turkish
|28600| unicode.org |ISO 8859-10 Latin 6
|28601| unicode.org |ISO 8859-11 Latin (Thai)
|28603| unicode.org |ISO 8859-13 Latin 7 (Estonian)
|28604| unicode.org |ISO 8859-14 Latin 8 (Celtic)
|28605| unicode.org |ISO 8859-15 Latin 9
|28606| unicode.org |ISO 8859-15 Latin 10
|29001|MakeEncoding.cs|Europa 3
|38598|MakeEncoding.cs|ISO 8859-8 Hebrew (ISO-Logical)
|50220|MakeEncoding.cs|ISO 2022 JIS Japanese with no halfwidth Katakana
|50221|MakeEncoding.cs|ISO 2022 JIS Japanese with halfwidth Katakana
|50222|MakeEncoding.cs|ISO 2022 Japanese JIS X 0201-1989 (1 byte Kana-SO/SI)
|50225|MakeEncoding.cs|ISO 2022 Korean
|50227|MakeEncoding.cs|ISO 2022 Simplified Chinese
|51932|MakeEncoding.cs|EUC Japanese
|51936|MakeEncoding.cs|EUC Simplified Chinese
|51949|MakeEncoding.cs|EUC Korean
|52936|MakeEncoding.cs|HZ-GB2312 Simplified Chinese
|54936|MakeEncoding.cs|GB18030 Simplified Chinese (4 byte)
|57002|MakeEncoding.cs|ISCII Devanagari
|57003|MakeEncoding.cs|ISCII Bengali
|57004|MakeEncoding.cs|ISCII Tamil
|57005|MakeEncoding.cs|ISCII Telugu
|57006|MakeEncoding.cs|ISCII Assamese
|57007|MakeEncoding.cs|ISCII Oriya
|57008|MakeEncoding.cs|ISCII Kannada
|57009|MakeEncoding.cs|ISCII Malayalam
|57010|MakeEncoding.cs|ISCII Gujarati
|57011|MakeEncoding.cs|ISCII Punjabi
|65000| magic |Unicode (UTF-7)
|65001| magic |Unicode (UTF-8)
| CP# | Source | Description |
|--------:|:-----------:|:-----------------------------------------------------|
| ` 37` | unicode.org | IBM EBCDIC US-Canada |
| ` 437` | unicode.org | OEM United States |
| ` 500` | unicode.org | IBM EBCDIC International |
| ` 620` | NLS | Mazovia (Polish) MS-DOS |
| ` 708` | Windows 7 | Arabic (ASMO 708) |
| ` 720` | Windows 7 | Arabic (Transparent ASMO); Arabic (DOS) |
| ` 737` | unicode.org | OEM Greek (formerly 437G); Greek (DOS) |
| ` 775` | unicode.org | OEM Baltic; Baltic (DOS) |
| ` 808` | unicode.org | OEM Russian; Cyrillic + Euro symbol |
| ` 850` | unicode.org | OEM Multilingual Latin 1; Western European (DOS) |
| ` 852` | unicode.org | OEM Latin 2; Central European (DOS) |
| ` 855` | unicode.org | OEM Cyrillic (primarily Russian) |
| ` 857` | unicode.org | OEM Turkish; Turkish (DOS) |
| ` 858` | Windows 7 | OEM Multilingual Latin 1 + Euro symbol |
| ` 860` | unicode.org | OEM Portuguese; Portuguese (DOS) |
| ` 861` | unicode.org | OEM Icelandic; Icelandic (DOS) |
| ` 862` | unicode.org | OEM Hebrew; Hebrew (DOS) |
| ` 863` | unicode.org | OEM French Canadian; French Canadian (DOS) |
| ` 864` | unicode.org | OEM Arabic; Arabic (864) |
| ` 865` | unicode.org | OEM Nordic; Nordic (DOS) |
| ` 866` | unicode.org | OEM Russian; Cyrillic (DOS) |
| ` 869` | unicode.org | OEM Modern Greek; Greek, Modern (DOS) |
| ` 870` | Windows 7 | IBM EBCDIC Multilingual/ROECE (Latin 2) |
| ` 872` | unicode.org | OEM Cyrillic (primarily Russian) + Euro Symbol |
| ` 874` | unicode.org | Windows Thai |
| ` 875` | unicode.org | IBM EBCDIC Greek Modern |
| ` 895` | NLS | Kamenický (Czech) MS-DOS |
| ` 932` | unicode.org | Japanese Shift-JIS |
| ` 936` | unicode.org | Simplified Chinese GBK |
| ` 949` | unicode.org | Korean |
| ` 950` | unicode.org | Traditional Chinese Big5 |
| ` 1010` | IBM | IBM EBCDIC French |
| ` 1026` | unicode.org | IBM EBCDIC Turkish (Latin 5) |
| ` 1047` | Windows 7 | IBM EBCDIC Latin 1/Open System |
| ` 1132` | IBM | IBM EBCDIC Lao (1132 / 1133 / 1341) |
| ` 1140` | Windows 7 | IBM EBCDIC US-Canada (037 + Euro symbol) |
| ` 1141` | Windows 7 | IBM EBCDIC Germany (20273 + Euro symbol) |
| ` 1142` | Windows 7 | IBM EBCDIC Denmark-Norway (20277 + Euro symbol) |
| ` 1143` | Windows 7 | IBM EBCDIC Finland-Sweden (20278 + Euro symbol) |
| ` 1144` | Windows 7 | IBM EBCDIC Italy (20280 + Euro symbol) |
| ` 1145` | Windows 7 | IBM EBCDIC Latin America-Spain (20284 + Euro symbol) |
| ` 1146` | Windows 7 | IBM EBCDIC United Kingdom (20285 + Euro symbol) |
| ` 1147` | Windows 7 | IBM EBCDIC France (20297 + Euro symbol) |
| ` 1148` | Windows 7 | IBM EBCDIC International (500 + Euro symbol) |
| ` 1149` | Windows 7 | IBM EBCDIC Icelandic (20871 + Euro symbol) |
| ` 1200` | magic | Unicode UTF-16, little endian (BMP of ISO 10646) |
| ` 1201` | magic | Unicode UTF-16, big endian |
| ` 1250` | unicode.org | Windows Central Europe |
| ` 1251` | unicode.org | Windows Cyrillic |
| ` 1252` | unicode.org | Windows Latin I |
| ` 1253` | unicode.org | Windows Greek |
| ` 1254` | unicode.org | Windows Turkish |
| ` 1255` | unicode.org | Windows Hebrew |
| ` 1256` | unicode.org | Windows Arabic |
| ` 1257` | unicode.org | Windows Baltic |
| ` 1258` | unicode.org | Windows Vietnam |
| ` 1361` | Windows 7 | Korean (Johab) |
| `10000` | unicode.org | MAC Roman |
| `10001` | Windows 7 | Japanese (Mac) |
| `10002` | Windows 7 | MAC Traditional Chinese (Big5) |
| `10003` | Windows 7 | Korean (Mac) |
| `10004` | Windows 7 | Arabic (Mac) |
| `10005` | Windows 7 | Hebrew (Mac) |
| `10006` | unicode.org | Greek (Mac) |
| `10007` | unicode.org | Cyrillic (Mac) |
| `10008` | Windows 7 | MAC Simplified Chinese (GB 2312) |
| `10010` | Windows 7 | Romanian (Mac) |
| `10017` | Windows 7 | Ukrainian (Mac) |
| `10021` | Windows 7 | Thai (Mac) |
| `10029` | unicode.org | MAC Latin 2 (Central European) |
| `10079` | unicode.org | Icelandic (Mac) |
| `10081` | unicode.org | Turkish (Mac) |
| `10082` | Windows 7 | Croatian (Mac) |
| `12000` | magic | Unicode UTF-32, little endian byte order |
| `12001` | magic | Unicode UTF-32, big endian byte order |
| `20000` | Windows 7 | CNS Taiwan (Chinese Traditional) |
| `20001` | Windows 7 | TCA Taiwan |
| `20002` | Windows 7 | Eten Taiwan (Chinese Traditional) |
| `20003` | Windows 7 | IBM5550 Taiwan |
| `20004` | Windows 7 | TeleText Taiwan |
| `20005` | Windows 7 | Wang Taiwan |
| `20105` | Windows 7 | Western European IA5 (IRV International Alphabet 5) |
| `20106` | Windows 7 | IA5 German (7-bit) |
| `20107` | Windows 7 | IA5 Swedish (7-bit) |
| `20108` | Windows 7 | IA5 Norwegian (7-bit) |
| `20127` | magic | US-ASCII (7-bit) |
| `20261` | Windows 7 | T.61 |
| `20269` | Windows 7 | ISO 6937 Non-Spacing Accent |
| `20273` | Windows 7 | IBM EBCDIC Germany |
| `20277` | Windows 7 | IBM EBCDIC Denmark-Norway |
| `20278` | Windows 7 | IBM EBCDIC Finland-Sweden |
| `20280` | Windows 7 | IBM EBCDIC Italy |
| `20284` | Windows 7 | IBM EBCDIC Latin America-Spain |
| `20285` | Windows 7 | IBM EBCDIC United Kingdom |
| `20290` | Windows 7 | IBM EBCDIC Japanese Katakana Extended |
| `20297` | Windows 7 | IBM EBCDIC France |
| `20420` | Windows 7 | IBM EBCDIC Arabic |
| `20423` | Windows 7 | IBM EBCDIC Greek |
| `20424` | Windows 7 | IBM EBCDIC Hebrew |
| `20833` | Windows 7 | IBM EBCDIC Korean Extended |
| `20838` | Windows 7 | IBM EBCDIC Thai |
| `20866` | Windows 7 | Russian Cyrillic (KOI8-R) |
| `20871` | Windows 7 | IBM EBCDIC Icelandic |
| `20880` | Windows 7 | IBM EBCDIC Cyrillic Russian |
| `20905` | Windows 7 | IBM EBCDIC Turkish |
| `20924` | Windows 7 | IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) |
| `20932` | Windows 7 | Japanese (JIS 0208-1990 and 0212-1990) |
| `20936` | Windows 7 | Simplified Chinese (GB2312-80) |
| `20949` | Windows 7 | Korean Wansung |
| `21025` | Windows 7 | IBM EBCDIC Cyrillic Serbian-Bulgarian |
| `21027` | NLS | Extended/Ext Alpha Lowercase |
| `21866` | Windows 7 | Ukrainian Cyrillic (KOI8-U) |
| `28591` | unicode.org | ISO 8859-1 Latin 1 (Western European) |
| `28592` | unicode.org | ISO 8859-2 Latin 2 (Central European) |
| `28593` | unicode.org | ISO 8859-3 Latin 3 |
| `28594` | unicode.org | ISO 8859-4 Baltic |
| `28595` | unicode.org | ISO 8859-5 Cyrillic |
| `28596` | unicode.org | ISO 8859-6 Arabic |
| `28597` | unicode.org | ISO 8859-7 Greek |
| `28598` | unicode.org | ISO 8859-8 Hebrew (ISO-Visual) |
| `28599` | unicode.org | ISO 8859-9 Turkish |
| `28600` | unicode.org | ISO 8859-10 Latin 6 |
| `28601` | unicode.org | ISO 8859-11 Latin (Thai) |
| `28603` | unicode.org | ISO 8859-13 Latin 7 (Estonian) |
| `28604` | unicode.org | ISO 8859-14 Latin 8 (Celtic) |
| `28605` | unicode.org | ISO 8859-15 Latin 9 |
| `28606` | unicode.org | ISO 8859-15 Latin 10 |
| `29001` | Windows 7 | Europa 3 |
| `38598` | Windows 7 | ISO 8859-8 Hebrew (ISO-Logical) |
| `47451` | unicode.org | Atari ST/TT |
| `50220` | Windows 7 | ISO 2022 JIS Japanese with no halfwidth Katakana |
| `50221` | Windows 7 | ISO 2022 JIS Japanese with halfwidth Katakana |
| `50222` | Windows 7 | ISO 2022 Japanese JIS X 0201-1989 (1 byte Kana-SO/SI)|
| `50225` | Windows 7 | ISO 2022 Korean |
| `50227` | Windows 7 | ISO 2022 Simplified Chinese |
| `51932` | Windows 7 | EUC Japanese |
| `51936` | Windows 7 | EUC Simplified Chinese |
| `51949` | Windows 7 | EUC Korean |
| `52936` | Windows 7 | HZ-GB2312 Simplified Chinese |
| `54936` | Windows 7 | GB18030 Simplified Chinese (4 byte) |
| `57002` | Windows 7 | ISCII Devanagari |
| `57003` | Windows 7 | ISCII Bengali |
| `57004` | Windows 7 | ISCII Tamil |
| `57005` | Windows 7 | ISCII Telugu |
| `57006` | Windows 7 | ISCII Assamese |
| `57007` | Windows 7 | ISCII Oriya |
| `57008` | Windows 7 | ISCII Kannada |
| `57009` | Windows 7 | ISCII Malayalam |
| `57010` | Windows 7 | ISCII Gujarati |
| `57011` | Windows 7 | ISCII Punjabi |
| `65000` | magic | Unicode (UTF-7) |
| `65001` | magic | Unicode (UTF-8) |
Note that MakeEncoding.cs deviates from unicode.org for some codepages. In the
case of direct conflicts, unicode.org takes precedence. In cases where the
unicode.org listing does not prescribe a value, MakeEncoding.cs value is used.
`unicode.org` refers to the Unicode Consortium Public Mappings, a database of
various mappings between unicode characters and respective character sets. The
tables are processed by a few scripts in the build process.
NLS refers to the National Language Support files supplied in various versions of
Windows. In older versions of Windows (e.g. Windows 98) these files followed the
pattern `CP_#.NLS`, but newer versions use the pattern `C_#.NLS`.
`IBM` refers to the IBM coded character set database. Even though IBM uses a
different numbering scheme from Windows, the IBM numbers are used when there is
no conflict. The tables are manually generated from the symbol PDFs.
`Windows 7` refers to direct inspection of Windows 7 machines using .NET class
`System.Text.Encoding`. The enclosed `MakeEncoding.cs` C# program brute-forces
code pages. MakeEncoding.cs deviates from unicode.org in some cases. When they
map a given code to different characters, unicode.org value is used. When
unicode.org does not prescribe a value, MakeEncoding.cs value is used.
`NLS` refers to the National Language Support files supplied in various versions
of Windows. In older versions of Windows (e.g. Windows 98) these files followed
the name pattern `CP_#.NLS`, but newer versions use the name pattern `C_#.NLS`.
## Sources
- [Unicode Consortium Public Mappings](http://www.unicode.org/Public/MAPPINGS/)
- [Code Page Enumeration](http://msdn.microsoft.com/en-us/library/cc195051.aspx)
- [Code Page Identifiers](http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx)
- [Windows Code Page Enumeration](http://msdn.microsoft.com/en-us/library/cc195051.aspx)
- [Windows Code Page Identifiers](http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx)
- [IBM Coded Character Sets](https://www-01.ibm.com/software/globalization/ccsid/ccsid_registered.html)

@@ -293,0 +319,0 @@ ## Badges

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc