Comparing version 1.4.0 to 1.5.0
102
cputils.js
/* cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */ | ||
/* vim: set ft=javascript: */ | ||
/*jshint newcap: false */ | ||
(function(root, factory){ | ||
(function(root, factory) { | ||
"use strict"; | ||
@@ -30,7 +31,7 @@ if(typeof cptable === "undefined") { | ||
var magic_encode = {}; | ||
var cpdcache = {}; | ||
var cpecache = {}; | ||
var cpdcache = {}; | ||
var sfcc = function sfcc(x) { return String.fromCharCode(x); }; | ||
var cca = function cca(x){ return x.charCodeAt(0); }; | ||
var cca = function cca(x) { return x.charCodeAt(0); }; | ||
@@ -54,8 +55,8 @@ var has_buf = (typeof Buffer !== 'undefined'); | ||
var len = data.length; | ||
var out, i, j, D, w; | ||
var out, i=0, j=0, D=0, w=0; | ||
if(typeof data === 'string') { | ||
out = Buffer(len); | ||
out = new Buffer(len); | ||
for(i = 0; i < len; ++i) out[i] = EE[data.charCodeAt(i)]; | ||
} else if(Buffer.isBuffer(data)) { | ||
out = Buffer(2*len); | ||
out = new Buffer(2*len); | ||
j = 0; | ||
@@ -75,6 +76,6 @@ for(i = 0; i < len; ++i) { | ||
} else { | ||
out = Buffer(len); | ||
out = new Buffer(len); | ||
for(i = 0; i < len; ++i) out[i] = EE[data[i].charCodeAt(0)]; | ||
} | ||
if(ofmt === undefined || ofmt === 'buf') return out; | ||
if(!ofmt || ofmt === 'buf') return out; | ||
if(ofmt !== 'arr') return out.toString('binary'); | ||
@@ -86,3 +87,3 @@ return [].slice.call(out); | ||
var D = cpt[cp].dec; | ||
var DD = new Buffer(131072), d=0, c; | ||
var DD = new Buffer(131072), d=0, c=""; | ||
for(d=0;d<D.length;++d) { | ||
@@ -94,3 +95,3 @@ if(!(c=D[d])) continue; | ||
return function sbcs_d(data) { | ||
var len = data.length, i=0, j; | ||
var len = data.length, i=0, j=0; | ||
if(2 * len > mdl) { mdl = 2 * len; mdb = new Buffer(mdl); } | ||
@@ -127,3 +128,3 @@ if(Buffer.isBuffer(data)) { | ||
return function dbcs_e(data, ofmt) { | ||
var len = data.length, out = new Buffer(2*len), i, j, jj, k, D; | ||
var len = data.length, out = new Buffer(2*len), i=0, j=0, jj=0, k=0, D=0; | ||
if(typeof data === 'string') { | ||
@@ -155,3 +156,3 @@ for(i = k = 0; i < len; ++i) { | ||
} | ||
if(ofmt === undefined || ofmt === 'buf') return out; | ||
if(!ofmt || ofmt === 'buf') return out; | ||
if(ofmt !== 'arr') return out.toString('binary'); | ||
@@ -172,3 +173,3 @@ return [].slice.call(out); | ||
return function dbcs_d(data) { | ||
var len = data.length, out = new Buffer(2*len), i, j, k=0; | ||
var len = data.length, out = new Buffer(2*len), i=0, j=0, k=0; | ||
if(Buffer.isBuffer(data)) { | ||
@@ -197,2 +198,3 @@ for(i = 0; i < len; i++) { | ||
magic_decode[65001] = function utf8_d(data) { | ||
if(typeof data === "string") return utf8_d(data.split("").map(cca)); | ||
var len = data.length, w = 0, ww = 0; | ||
@@ -217,2 +219,7 @@ if(4 * len > mdl) { mdl = 4 * len; mdb = new Buffer(mdl); } | ||
magic_encode[65001] = function utf8_e(data, ofmt) { | ||
if(has_buf && Buffer.isBuffer(data)) { | ||
if(!ofmt || ofmt === 'buf') return data; | ||
if(ofmt !== 'arr') return data.toString('binary'); | ||
return [].slice.call(data); | ||
} | ||
var len = data.length, w = 0, ww = 0, j = 0; | ||
@@ -240,3 +247,3 @@ var direct = typeof data === "string"; | ||
} | ||
if(ofmt === undefined || ofmt === 'buf') return mdb.slice(0,j); | ||
if(!ofmt || ofmt === 'buf') return mdb.slice(0,j); | ||
if(ofmt !== 'arr') return mdb.slice(0,j).toString('binary'); | ||
@@ -250,3 +257,3 @@ return [].slice.call(mdb, 0, j); | ||
if(cpdcache[sbcs_cache[0]]) return; | ||
var i, s; | ||
var i=0, s=0; | ||
for(i = 0; i < sbcs_cache.length; ++i) { | ||
@@ -273,3 +280,4 @@ s = sbcs_cache[i]; | ||
}; | ||
var cp_decache = function cp_decache(cp) { cpdcache[cp] = cpecache[cp] = undefined; }; | ||
var null_enc = function(data, ofmt) { return ""; }; | ||
var cp_decache = function cp_decache(cp) { delete cpdcache[cp]; delete cpecache[cp]; }; | ||
var decache = function decache() { | ||
@@ -282,3 +290,3 @@ if(has_buf) { | ||
} | ||
last_enc = last_cp = undefined; | ||
last_enc = null_enc; last_cp = 0; | ||
}; | ||
@@ -296,17 +304,16 @@ var cache = { | ||
var SetD = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"; | ||
var last_enc, last_cp; | ||
var last_enc = null_enc, last_cp = 0; | ||
var encode = function encode(cp, data, ofmt) { | ||
if(cp === last_cp) { return last_enc(data, ofmt); } | ||
if(cpecache[cp] !== undefined) { last_enc = cpecache[last_cp=cp]; return last_enc(data, ofmt); } | ||
if(cp === last_cp && last_enc) { return last_enc(data, ofmt); } | ||
if(cpecache[cp]) { last_enc = cpecache[last_cp=cp]; return last_enc(data, ofmt); } | ||
if(has_buf && Buffer.isBuffer(data)) data = data.toString('utf8'); | ||
var len = data.length; | ||
var out = has_buf ? new Buffer(4*len) : [], w, i, j = 0, c, tt, ww; | ||
var C = cpt[cp], E, M; | ||
var out = has_buf ? new Buffer(4*len) : [], w=0, i=0, j = 0, ww=0; | ||
var C = cpt[cp], E, M = ""; | ||
if(C && (E=C.enc)) for(i = 0; i < len; ++i, ++j) { | ||
w = E[data[i]]; | ||
out[j] = w&255; | ||
if(w > 255) { | ||
out[j] = w>>8; | ||
out[++j] = w&255; | ||
} | ||
} else out[j] = w&255; | ||
} | ||
@@ -376,3 +383,3 @@ else if((M=magic[cp])) switch(M) { | ||
out[j+1] = w&255; w >>= 8; | ||
out[j] = w&255; w >>= 8; | ||
out[j] = w&255; | ||
j+=4; | ||
@@ -383,6 +390,6 @@ } | ||
for(i = 0; i < len; i++) { | ||
c = data[i]; | ||
var c = data[i]; | ||
if(c === "+") { out[j++] = 0x2b; out[j++] = 0x2d; continue; } | ||
if(SetD.indexOf(c) > -1) { out[j++] = c.charCodeAt(0); continue; } | ||
tt = encode(1201, c); | ||
var tt = encode(1201, c); | ||
out[j++] = 0x2b; | ||
@@ -399,4 +406,4 @@ out[j++] = BM.charCodeAt(tt[0]>>2); | ||
out = out.slice(0,j); | ||
if(typeof Buffer === 'undefined') return (ofmt == 'str') ? out.map(sfcc).join("") : out; | ||
if(ofmt === undefined || ofmt === 'buf') return out; | ||
if(!has_buf) return (ofmt == 'str') ? (out).map(sfcc).join("") : out; | ||
if(!ofmt || ofmt === 'buf') return out; | ||
if(ofmt !== 'arr') return out.toString('binary'); | ||
@@ -407,15 +414,15 @@ return [].slice.call(out); | ||
var F; if((F=cpdcache[cp])) return F(data); | ||
var len = data.length, out = new Array(len), w, i, j = 1, k = 0, ww; | ||
var C = cpt[cp], D, M; | ||
if(typeof data === "string") return decode(cp, data.split("").map(cca)); | ||
var len = data.length, out = new Array(len), s="", w=0, i=0, j=1, k=0, ww=0; | ||
var C = cpt[cp], D, M=""; | ||
if(C && (D=C.dec)) { | ||
if(typeof data === "string") data = data.split("").map(cca); | ||
for(i = 0; i < len; i+=j) { | ||
j = 2; | ||
w = D[(data[i]<<8)+ data[i+1]]; | ||
if(!w) { | ||
s = D[(data[i]<<8)+ data[i+1]]; | ||
if(!s) { | ||
j = 1; | ||
w = D[data[i]]; | ||
s = D[data[i]]; | ||
} | ||
if(!w) throw new Error('Unrecognized code: ' + data[i] + ' ' + data[i+j-1] + ' ' + i + ' ' + j + ' ' + D[data[i]]); | ||
out[k++] = w; | ||
if(!s) throw new Error('Unrecognized code: ' + data[i] + ' ' + data[i+j-1] + ' ' + i + ' ' + j + ' ' + D[data[i]]); | ||
out[k++] = s; | ||
} | ||
@@ -425,3 +432,2 @@ } | ||
case "utf8": | ||
i = 0; | ||
if(len >= 3 && data[0] == 0xEF) if(data[1] == 0xBB && data[2] == 0xBF) i = 3; | ||
@@ -446,7 +452,6 @@ for(; i < len; i+=j) { | ||
case "utf16le": | ||
i = 0; | ||
if(len >= 2 && data[0] == 0xFF) if(data[1] == 0xFE) i = 2; | ||
if(has_buf && Buffer.isBuffer(data)) return data.toString(M); | ||
j = 2; | ||
for(; i < len; i+=j) { | ||
for(; i+1 < len; i+=j) { | ||
out[k++] = String.fromCharCode((data[i+1]<<8) + data[i]); | ||
@@ -456,6 +461,5 @@ } | ||
case "utf16be": | ||
i = 0; | ||
if(len >= 2 && data[0] == 0xFE) if(data[1] == 0xFF) i = 2; | ||
j = 2; | ||
for(; i < len; i+=j) { | ||
for(; i+1 < len; i+=j) { | ||
out[k++] = String.fromCharCode((data[i]<<8) + data[i+1]); | ||
@@ -465,3 +469,2 @@ } | ||
case "utf32le": | ||
i = 0; | ||
if(len >= 4 && data[0] == 0xFF) if(data[1] == 0xFE && data[2] === 0 && data[3] === 0) i = 4; | ||
@@ -480,3 +483,2 @@ j = 4; | ||
case "utf32be": | ||
i = 0; | ||
if(len >= 4 && data[3] == 0xFF) if(data[2] == 0xFE && data[1] === 0 && data[0] === 0) i = 4; | ||
@@ -495,3 +497,2 @@ j = 4; | ||
case "utf7": | ||
i = 0; | ||
if(len >= 4 && data[0] == 0x2B && data[1] == 0x2F && data[2] == 0x76) { | ||
@@ -509,5 +510,5 @@ if(len >= 5 && data[3] == 0x38 && data[4] == 0x2D) i = 5; | ||
var tt = []; | ||
var o64; | ||
var c1, c2, c3; | ||
var e1, e2, e3, e4; | ||
var o64 = ""; | ||
var c1=0, c2=0, c3=0; | ||
var e1=0, e2=0, e3=0, e4=0; | ||
for(var l = 1; l < j - dash;) { | ||
@@ -527,5 +528,4 @@ e1 = BM.indexOf(String.fromCharCode(data[i+l++])); | ||
} | ||
if((tt.length & 1) === 1) tt.length--; | ||
o64 = decode(1201, tt); | ||
for(l = 0; l < o64.length; ++l) out[k++] = o64[l]; | ||
for(l = 0; l < o64.length; ++l) out[k++] = o64.charAt(l); | ||
} | ||
@@ -538,5 +538,5 @@ break; | ||
}; | ||
var hascp = function hascp(cp) { return cpt[cp] || magic[cp]; }; | ||
var hascp = function hascp(cp) { return !!(cpt[cp] || magic[cp]); }; | ||
cpt.utils = { decode: decode, encode: encode, hascp: hascp, magic: magic, cache:cache }; | ||
return cpt; | ||
})); |
/* cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */ | ||
/* vim: set ft=javascript: */ | ||
/*jshint newcap: false */ | ||
(function(root, factory){ | ||
(function(root, factory) { | ||
"use strict"; | ||
@@ -30,7 +31,7 @@ if(typeof cptable === "undefined") { | ||
var magic_encode = {}; | ||
var cpdcache = {}; | ||
var cpecache = {}; | ||
var cpdcache = {}; | ||
var sfcc = function sfcc(x) { return String.fromCharCode(x); }; | ||
var cca = function cca(x){ return x.charCodeAt(0); }; | ||
var cca = function cca(x) { return x.charCodeAt(0); }; | ||
@@ -54,8 +55,8 @@ var has_buf = (typeof Buffer !== 'undefined'); | ||
var len = data.length; | ||
var out, i, j, D, w; | ||
var out, i=0, j=0, D=0, w=0; | ||
if(typeof data === 'string') { | ||
out = Buffer(len); | ||
out = new Buffer(len); | ||
for(i = 0; i < len; ++i) out[i] = EE[data.charCodeAt(i)]; | ||
} else if(Buffer.isBuffer(data)) { | ||
out = Buffer(2*len); | ||
out = new Buffer(2*len); | ||
j = 0; | ||
@@ -75,6 +76,6 @@ for(i = 0; i < len; ++i) { | ||
} else { | ||
out = Buffer(len); | ||
out = new Buffer(len); | ||
for(i = 0; i < len; ++i) out[i] = EE[data[i].charCodeAt(0)]; | ||
} | ||
if(ofmt === undefined || ofmt === 'buf') return out; | ||
if(!ofmt || ofmt === 'buf') return out; | ||
if(ofmt !== 'arr') return out.toString('binary'); | ||
@@ -86,3 +87,3 @@ return [].slice.call(out); | ||
var D = cpt[cp].dec; | ||
var DD = new Buffer(131072), d=0, c; | ||
var DD = new Buffer(131072), d=0, c=""; | ||
for(d=0;d<D.length;++d) { | ||
@@ -94,3 +95,3 @@ if(!(c=D[d])) continue; | ||
return function sbcs_d(data) { | ||
var len = data.length, i=0, j; | ||
var len = data.length, i=0, j=0; | ||
if(2 * len > mdl) { mdl = 2 * len; mdb = new Buffer(mdl); } | ||
@@ -127,3 +128,3 @@ if(Buffer.isBuffer(data)) { | ||
return function dbcs_e(data, ofmt) { | ||
var len = data.length, out = new Buffer(2*len), i, j, jj, k, D; | ||
var len = data.length, out = new Buffer(2*len), i=0, j=0, jj=0, k=0, D=0; | ||
if(typeof data === 'string') { | ||
@@ -155,3 +156,3 @@ for(i = k = 0; i < len; ++i) { | ||
} | ||
if(ofmt === undefined || ofmt === 'buf') return out; | ||
if(!ofmt || ofmt === 'buf') return out; | ||
if(ofmt !== 'arr') return out.toString('binary'); | ||
@@ -172,3 +173,3 @@ return [].slice.call(out); | ||
return function dbcs_d(data) { | ||
var len = data.length, out = new Buffer(2*len), i, j, k=0; | ||
var len = data.length, out = new Buffer(2*len), i=0, j=0, k=0; | ||
if(Buffer.isBuffer(data)) { | ||
@@ -197,2 +198,3 @@ for(i = 0; i < len; i++) { | ||
magic_decode[65001] = function utf8_d(data) { | ||
if(typeof data === "string") return utf8_d(data.split("").map(cca)); | ||
var len = data.length, w = 0, ww = 0; | ||
@@ -217,2 +219,7 @@ if(4 * len > mdl) { mdl = 4 * len; mdb = new Buffer(mdl); } | ||
magic_encode[65001] = function utf8_e(data, ofmt) { | ||
if(has_buf && Buffer.isBuffer(data)) { | ||
if(!ofmt || ofmt === 'buf') return data; | ||
if(ofmt !== 'arr') return data.toString('binary'); | ||
return [].slice.call(data); | ||
} | ||
var len = data.length, w = 0, ww = 0, j = 0; | ||
@@ -240,3 +247,3 @@ var direct = typeof data === "string"; | ||
} | ||
if(ofmt === undefined || ofmt === 'buf') return mdb.slice(0,j); | ||
if(!ofmt || ofmt === 'buf') return mdb.slice(0,j); | ||
if(ofmt !== 'arr') return mdb.slice(0,j).toString('binary'); | ||
@@ -250,3 +257,3 @@ return [].slice.call(mdb, 0, j); | ||
if(cpdcache[sbcs_cache[0]]) return; | ||
var i, s; | ||
var i=0, s=0; | ||
for(i = 0; i < sbcs_cache.length; ++i) { | ||
@@ -273,3 +280,4 @@ s = sbcs_cache[i]; | ||
}; | ||
var cp_decache = function cp_decache(cp) { cpdcache[cp] = cpecache[cp] = undefined; }; | ||
var null_enc = function(data, ofmt) { return ""; }; | ||
var cp_decache = function cp_decache(cp) { delete cpdcache[cp]; delete cpecache[cp]; }; | ||
var decache = function decache() { | ||
@@ -282,3 +290,3 @@ if(has_buf) { | ||
} | ||
last_enc = last_cp = undefined; | ||
last_enc = null_enc; last_cp = 0; | ||
}; | ||
@@ -296,17 +304,16 @@ var cache = { | ||
var SetD = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"; | ||
var last_enc, last_cp; | ||
var last_enc = null_enc, last_cp = 0; | ||
var encode = function encode(cp, data, ofmt) { | ||
if(cp === last_cp) { return last_enc(data, ofmt); } | ||
if(cpecache[cp] !== undefined) { last_enc = cpecache[last_cp=cp]; return last_enc(data, ofmt); } | ||
if(cp === last_cp && last_enc) { return last_enc(data, ofmt); } | ||
if(cpecache[cp]) { last_enc = cpecache[last_cp=cp]; return last_enc(data, ofmt); } | ||
if(has_buf && Buffer.isBuffer(data)) data = data.toString('utf8'); | ||
var len = data.length; | ||
var out = has_buf ? new Buffer(4*len) : [], w, i, j = 0, c, tt, ww; | ||
var C = cpt[cp], E, M; | ||
var out = has_buf ? new Buffer(4*len) : [], w=0, i=0, j = 0, ww=0; | ||
var C = cpt[cp], E, M = ""; | ||
if(C && (E=C.enc)) for(i = 0; i < len; ++i, ++j) { | ||
w = E[data[i]]; | ||
out[j] = w&255; | ||
if(w > 255) { | ||
out[j] = w>>8; | ||
out[++j] = w&255; | ||
} | ||
} else out[j] = w&255; | ||
} | ||
@@ -376,3 +383,3 @@ else if((M=magic[cp])) switch(M) { | ||
out[j+1] = w&255; w >>= 8; | ||
out[j] = w&255; w >>= 8; | ||
out[j] = w&255; | ||
j+=4; | ||
@@ -383,6 +390,6 @@ } | ||
for(i = 0; i < len; i++) { | ||
c = data[i]; | ||
var c = data[i]; | ||
if(c === "+") { out[j++] = 0x2b; out[j++] = 0x2d; continue; } | ||
if(SetD.indexOf(c) > -1) { out[j++] = c.charCodeAt(0); continue; } | ||
tt = encode(1201, c); | ||
var tt = encode(1201, c); | ||
out[j++] = 0x2b; | ||
@@ -399,4 +406,4 @@ out[j++] = BM.charCodeAt(tt[0]>>2); | ||
out = out.slice(0,j); | ||
if(typeof Buffer === 'undefined') return (ofmt == 'str') ? out.map(sfcc).join("") : out; | ||
if(ofmt === undefined || ofmt === 'buf') return out; | ||
if(!has_buf) return (ofmt == 'str') ? (out).map(sfcc).join("") : out; | ||
if(!ofmt || ofmt === 'buf') return out; | ||
if(ofmt !== 'arr') return out.toString('binary'); | ||
@@ -407,15 +414,15 @@ return [].slice.call(out); | ||
var F; if((F=cpdcache[cp])) return F(data); | ||
var len = data.length, out = new Array(len), w, i, j = 1, k = 0, ww; | ||
var C = cpt[cp], D, M; | ||
if(typeof data === "string") return decode(cp, data.split("").map(cca)); | ||
var len = data.length, out = new Array(len), s="", w=0, i=0, j=1, k=0, ww=0; | ||
var C = cpt[cp], D, M=""; | ||
if(C && (D=C.dec)) { | ||
if(typeof data === "string") data = data.split("").map(cca); | ||
for(i = 0; i < len; i+=j) { | ||
j = 2; | ||
w = D[(data[i]<<8)+ data[i+1]]; | ||
if(!w) { | ||
s = D[(data[i]<<8)+ data[i+1]]; | ||
if(!s) { | ||
j = 1; | ||
w = D[data[i]]; | ||
s = D[data[i]]; | ||
} | ||
if(!w) throw new Error('Unrecognized code: ' + data[i] + ' ' + data[i+j-1] + ' ' + i + ' ' + j + ' ' + D[data[i]]); | ||
out[k++] = w; | ||
if(!s) throw new Error('Unrecognized code: ' + data[i] + ' ' + data[i+j-1] + ' ' + i + ' ' + j + ' ' + D[data[i]]); | ||
out[k++] = s; | ||
} | ||
@@ -425,3 +432,2 @@ } | ||
case "utf8": | ||
i = 0; | ||
if(len >= 3 && data[0] == 0xEF) if(data[1] == 0xBB && data[2] == 0xBF) i = 3; | ||
@@ -446,7 +452,6 @@ for(; i < len; i+=j) { | ||
case "utf16le": | ||
i = 0; | ||
if(len >= 2 && data[0] == 0xFF) if(data[1] == 0xFE) i = 2; | ||
if(has_buf && Buffer.isBuffer(data)) return data.toString(M); | ||
j = 2; | ||
for(; i < len; i+=j) { | ||
for(; i+1 < len; i+=j) { | ||
out[k++] = String.fromCharCode((data[i+1]<<8) + data[i]); | ||
@@ -456,6 +461,5 @@ } | ||
case "utf16be": | ||
i = 0; | ||
if(len >= 2 && data[0] == 0xFE) if(data[1] == 0xFF) i = 2; | ||
j = 2; | ||
for(; i < len; i+=j) { | ||
for(; i+1 < len; i+=j) { | ||
out[k++] = String.fromCharCode((data[i]<<8) + data[i+1]); | ||
@@ -465,3 +469,2 @@ } | ||
case "utf32le": | ||
i = 0; | ||
if(len >= 4 && data[0] == 0xFF) if(data[1] == 0xFE && data[2] === 0 && data[3] === 0) i = 4; | ||
@@ -480,3 +483,2 @@ j = 4; | ||
case "utf32be": | ||
i = 0; | ||
if(len >= 4 && data[3] == 0xFF) if(data[2] == 0xFE && data[1] === 0 && data[0] === 0) i = 4; | ||
@@ -495,3 +497,2 @@ j = 4; | ||
case "utf7": | ||
i = 0; | ||
if(len >= 4 && data[0] == 0x2B && data[1] == 0x2F && data[2] == 0x76) { | ||
@@ -509,5 +510,5 @@ if(len >= 5 && data[3] == 0x38 && data[4] == 0x2D) i = 5; | ||
var tt = []; | ||
var o64; | ||
var c1, c2, c3; | ||
var e1, e2, e3, e4; | ||
var o64 = ""; | ||
var c1=0, c2=0, c3=0; | ||
var e1=0, e2=0, e3=0, e4=0; | ||
for(var l = 1; l < j - dash;) { | ||
@@ -527,5 +528,4 @@ e1 = BM.indexOf(String.fromCharCode(data[i+l++])); | ||
} | ||
if((tt.length & 1) === 1) tt.length--; | ||
o64 = decode(1201, tt); | ||
for(l = 0; l < o64.length; ++l) out[k++] = o64[l]; | ||
for(l = 0; l < o64.length; ++l) out[k++] = o64.charAt(l); | ||
} | ||
@@ -538,5 +538,5 @@ break; | ||
}; | ||
var hascp = function hascp(cp) { return cpt[cp] || magic[cp]; }; | ||
var hascp = function hascp(cp) { return !!(cpt[cp] || magic[cp]); }; | ||
cpt.utils = { decode: decode, encode: encode, hascp: hascp, magic: magic, cache:cache }; | ||
return cpt; | ||
})); |
{ | ||
"name": "codepage", | ||
"version": "1.4.0", | ||
"version": "1.5.0", | ||
"author": "SheetJS", | ||
@@ -36,3 +36,3 @@ "description": "pure-JS library to handle codepages", | ||
"blanket": { | ||
"pattern": "[cptable.js,cputils.js,cpexcel.js]" | ||
"pattern": "[cputils.js]" | ||
} | ||
@@ -39,0 +39,0 @@ }, |
362
README.md
@@ -50,17 +50,23 @@ # Codepages for JS | ||
The codepages are indexed by number. To get the unicode character for a given | ||
Most codepages are indexed by number. To get the unicode character for a given | ||
codepoint, use the `dec` property: | ||
var unicode_cp10000_255 = cptable[10000].dec[255]; // ˇ | ||
```js | ||
var unicode_cp10000_255 = cptable[10000].dec[255]; // ˇ | ||
``` | ||
To get the codepoint for a given character, use the `enc` property: | ||
var cp10000_711 = cptable[10000].enc[String.fromCharCode(711)]; // 255 | ||
```js | ||
var cp10000_711 = cptable[10000].enc[String.fromCharCode(711)]; // 255 | ||
``` | ||
There are a few utilities that deal with strings and buffers: | ||
var 汇总 = cptable.utils.decode(936, [0xbb,0xe3,0xd7,0xdc]); | ||
var buf = cptable.utils.encode(936, 汇总); | ||
var sushi= cptable.utils.decode(65001, [0xf0,0x9f,0x8d,0xa3]); // 🍣 | ||
var sbuf = cptable.utils.encode(65001, sushi); | ||
```js | ||
var 汇总 = cptable.utils.decode(936, [0xbb,0xe3,0xd7,0xdc]); | ||
var buf = cptable.utils.encode(936, 汇总); | ||
var sushi= cptable.utils.decode(65001, [0xf0,0x9f,0x8d,0xa3]); // 🍣 | ||
var sbuf = cptable.utils.encode(65001, sushi); | ||
``` | ||
@@ -71,3 +77,3 @@ `cptable.utils.encode(CP, data, ofmt)` accepts a String or Array of characters | ||
- Default output is a Buffer (or Array) of bytes (integers between 0 and 255). | ||
- If `ofmt == 'str'`, return a String where `o.charCodeAt(i)` is the ith byte | ||
- If `ofmt == 'str'`, return a String where `o.charCodeAt(i)` is the `i`-th byte | ||
- If `ofmt == 'arr'`, return an Array of bytes | ||
@@ -84,3 +90,5 @@ | ||
var cptable = require('codepage/dist/cpexcel.full'); | ||
```js | ||
var cptable = require('codepage/dist/cpexcel.full'); | ||
``` | ||
@@ -93,3 +101,5 @@ ## Rolling your own script | ||
bash make.sh path_to_manifest output_file_name JSVAR | ||
```bash | ||
bash make.sh path_to_manifest output_file_name JSVAR | ||
``` | ||
@@ -126,167 +136,183 @@ where | ||
Some codepages are easier to implement algorithmically. Since these are | ||
hardcoded in utils, there is no corresponding entry (they are "magic") | ||
hardcoded in `utils`, there is no corresponding entry (they are "magic"). | ||
| CP# | Information | Description | | ||
| --: | :----------: | :---------- | | ||
| 37| unicode.org |IBM EBCDIC US-Canada | ||
| 437| unicode.org |OEM United States | ||
| 500| unicode.org |IBM EBCDIC International | ||
| 620| NLS |Mazovia (Polish) MS-DOS | ||
| 708|MakeEncoding.cs|Arabic (ASMO 708) | ||
| 720|MakeEncoding.cs|Arabic (Transparent ASMO); Arabic (DOS) | ||
| 737| unicode.org |OEM Greek (formerly 437G); Greek (DOS) | ||
| 775| unicode.org |OEM Baltic; Baltic (DOS) | ||
| 850| unicode.org |OEM Multilingual Latin 1; Western European (DOS) | ||
| 852| unicode.org |OEM Latin 2; Central European (DOS) | ||
| 855| unicode.org |OEM Cyrillic (primarily Russian) | ||
| 857| unicode.org |OEM Turkish; Turkish (DOS) | ||
| 858|MakeEncoding.cs|OEM Multilingual Latin 1 + Euro symbol | ||
| 860| unicode.org |OEM Portuguese; Portuguese (DOS) | ||
| 861| unicode.org |OEM Icelandic; Icelandic (DOS) | ||
| 862| unicode.org |OEM Hebrew; Hebrew (DOS) | ||
| 863| unicode.org |OEM French Canadian; French Canadian (DOS) | ||
| 864| unicode.org |OEM Arabic; Arabic (864) | ||
| 865| unicode.org |OEM Nordic; Nordic (DOS) | ||
| 866| unicode.org |OEM Russian; Cyrillic (DOS) | ||
| 869| unicode.org |OEM Modern Greek; Greek, Modern (DOS) | ||
| 870|MakeEncoding.cs|IBM EBCDIC Multilingual/ROECE (Latin 2) | ||
| 874| unicode.org |Windows Thai | ||
| 875| unicode.org |IBM EBCDIC Greek Modern | ||
| 895| NLS |Kamenický (Czech) MS-DOS | ||
| 932| unicode.org |Japanese Shift-JIS | ||
| 936| unicode.org |Simplified Chinese GBK | ||
| 949| unicode.org |Korean | ||
| 950| unicode.org |Traditional Chinese Big5 | ||
| 1026| unicode.org |IBM EBCDIC Turkish (Latin 5) | ||
| 1047|MakeEncoding.cs|IBM EBCDIC Latin 1/Open System | ||
| 1140|MakeEncoding.cs|IBM EBCDIC US-Canada (037 + Euro symbol) | ||
| 1141|MakeEncoding.cs|IBM EBCDIC Germany (20273 + Euro symbol) | ||
| 1142|MakeEncoding.cs|IBM EBCDIC Denmark-Norway (20277 + Euro symbol) | ||
| 1143|MakeEncoding.cs|IBM EBCDIC Finland-Sweden (20278 + Euro symbol) | ||
| 1144|MakeEncoding.cs|IBM EBCDIC Italy (20280 + Euro symbol) | ||
| 1145|MakeEncoding.cs|IBM EBCDIC Latin America-Spain (20284 + Euro symbol) | ||
| 1146|MakeEncoding.cs|IBM EBCDIC United Kingdom (20285 + Euro symbol) | ||
| 1147|MakeEncoding.cs|IBM EBCDIC France (20297 + Euro symbol) | ||
| 1148|MakeEncoding.cs|IBM EBCDIC International (500 + Euro symbol) | ||
| 1149|MakeEncoding.cs|IBM EBCDIC Icelandic (20871 + Euro symbol) | ||
| 1200| magic |Unicode UTF-16, little endian (BMP of ISO 10646) | ||
| 1201| magic |Unicode UTF-16, big endian | ||
| 1250| unicode.org |Windows Central Europe | ||
| 1251| unicode.org |Windows Cyrillic | ||
| 1252| unicode.org |Windows Latin I | ||
| 1253| unicode.org |Windows Greek | ||
| 1254| unicode.org |Windows Turkish | ||
| 1255| unicode.org |Windows Hebrew | ||
| 1256| unicode.org |Windows Arabic | ||
| 1257| unicode.org |Windows Baltic | ||
| 1258| unicode.org |Windows Vietnam | ||
| 1361|MakeEncoding.cs|Korean (Johab) | ||
|10000| unicode.org |MAC Roman | ||
|10001|MakeEncoding.cs|Japanese (Mac) | ||
|10002|MakeEncoding.cs|MAC Traditional Chinese (Big5) | ||
|10003|MakeEncoding.cs|Korean (Mac) | ||
|10004|MakeEncoding.cs|Arabic (Mac) | ||
|10005|MakeEncoding.cs|Hebrew (Mac) | ||
|10006| unicode.org |Greek (Mac) | ||
|10007| unicode.org |Cyrillic (Mac) | ||
|10008|MakeEncoding.cs|MAC Simplified Chinese (GB 2312) | ||
|10010|MakeEncoding.cs|Romanian (Mac) | ||
|10017|MakeEncoding.cs|Ukrainian (Mac) | ||
|10021|MakeEncoding.cs|Thai (Mac) | ||
|10029| unicode.org |MAC Latin 2 (Central European) | ||
|10079| unicode.org |Icelandic (Mac) | ||
|10081| unicode.org |Turkish (Mac) | ||
|10082|MakeEncoding.cs|Croatian (Mac) | ||
|12000| magic |Unicode UTF-32, little endian byte order | ||
|12001| magic |Unicode UTF-32, big endian byte order | ||
|20000|MakeEncoding.cs|CNS Taiwan (Chinese Traditional) | ||
|20001|MakeEncoding.cs|TCA Taiwan | ||
|20002|MakeEncoding.cs|Eten Taiwan (Chinese Traditional) | ||
|20003|MakeEncoding.cs|IBM5550 Taiwan | ||
|20004|MakeEncoding.cs|TeleText Taiwan | ||
|20005|MakeEncoding.cs|Wang Taiwan | ||
|20105|MakeEncoding.cs|Western European IA5 (IRV International Alphabet 5) 7-bit | ||
|20106|MakeEncoding.cs|IA5 German (7-bit) | ||
|20107|MakeEncoding.cs|IA5 Swedish (7-bit) | ||
|20108|MakeEncoding.cs|IA5 Norwegian (7-bit) | ||
|20127| magic |US-ASCII (7-bit) | ||
|20261|MakeEncoding.cs|T.61 | ||
|20269|MakeEncoding.cs|ISO 6937 Non-Spacing Accent | ||
|20273|MakeEncoding.cs|IBM EBCDIC Germany | ||
|20277|MakeEncoding.cs|IBM EBCDIC Denmark-Norway | ||
|20278|MakeEncoding.cs|IBM EBCDIC Finland-Sweden | ||
|20280|MakeEncoding.cs|IBM EBCDIC Italy | ||
|20284|MakeEncoding.cs|IBM EBCDIC Latin America-Spain | ||
|20285|MakeEncoding.cs|IBM EBCDIC United Kingdom | ||
|20290|MakeEncoding.cs|IBM EBCDIC Japanese Katakana Extended | ||
|20297|MakeEncoding.cs|IBM EBCDIC France | ||
|20420|MakeEncoding.cs|IBM EBCDIC Arabic | ||
|20423|MakeEncoding.cs|IBM EBCDIC Greek | ||
|20424|MakeEncoding.cs|IBM EBCDIC Hebrew | ||
|20833|MakeEncoding.cs|IBM EBCDIC Korean Extended | ||
|20838|MakeEncoding.cs|IBM EBCDIC Thai | ||
|20866|MakeEncoding.cs|Russian Cyrillic (KOI8-R) | ||
|20871|MakeEncoding.cs|IBM EBCDIC Icelandic | ||
|20880|MakeEncoding.cs|IBM EBCDIC Cyrillic Russian | ||
|20905|MakeEncoding.cs|IBM EBCDIC Turkish | ||
|20924|MakeEncoding.cs|IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) | ||
|20932|MakeEncoding.cs|Japanese (JIS 0208-1990 and 0212-1990) | ||
|20936|MakeEncoding.cs|Simplified Chinese (GB2312-80) | ||
|20949|MakeEncoding.cs|Korean Wansung | ||
|21025|MakeEncoding.cs|IBM EBCDIC Cyrillic Serbian-Bulgarian | ||
|21027| NLS |Extended/Ext Alpha Lowercase | ||
|21866|MakeEncoding.cs|Ukrainian Cyrillic (KOI8-U) | ||
|28591| unicode.org |ISO 8859-1 Latin 1 (Western European) | ||
|28592| unicode.org |ISO 8859-2 Latin 2 (Central European) | ||
|28593| unicode.org |ISO 8859-3 Latin 3 | ||
|28594| unicode.org |ISO 8859-4 Baltic | ||
|28595| unicode.org |ISO 8859-5 Cyrillic | ||
|28596| unicode.org |ISO 8859-6 Arabic | ||
|28597| unicode.org |ISO 8859-7 Greek | ||
|28598| unicode.org |ISO 8859-8 Hebrew (ISO-Visual) | ||
|28599| unicode.org |ISO 8859-9 Turkish | ||
|28600| unicode.org |ISO 8859-10 Latin 6 | ||
|28601| unicode.org |ISO 8859-11 Latin (Thai) | ||
|28603| unicode.org |ISO 8859-13 Latin 7 (Estonian) | ||
|28604| unicode.org |ISO 8859-14 Latin 8 (Celtic) | ||
|28605| unicode.org |ISO 8859-15 Latin 9 | ||
|28606| unicode.org |ISO 8859-15 Latin 10 | ||
|29001|MakeEncoding.cs|Europa 3 | ||
|38598|MakeEncoding.cs|ISO 8859-8 Hebrew (ISO-Logical) | ||
|50220|MakeEncoding.cs|ISO 2022 JIS Japanese with no halfwidth Katakana | ||
|50221|MakeEncoding.cs|ISO 2022 JIS Japanese with halfwidth Katakana | ||
|50222|MakeEncoding.cs|ISO 2022 Japanese JIS X 0201-1989 (1 byte Kana-SO/SI) | ||
|50225|MakeEncoding.cs|ISO 2022 Korean | ||
|50227|MakeEncoding.cs|ISO 2022 Simplified Chinese | ||
|51932|MakeEncoding.cs|EUC Japanese | ||
|51936|MakeEncoding.cs|EUC Simplified Chinese | ||
|51949|MakeEncoding.cs|EUC Korean | ||
|52936|MakeEncoding.cs|HZ-GB2312 Simplified Chinese | ||
|54936|MakeEncoding.cs|GB18030 Simplified Chinese (4 byte) | ||
|57002|MakeEncoding.cs|ISCII Devanagari | ||
|57003|MakeEncoding.cs|ISCII Bengali | ||
|57004|MakeEncoding.cs|ISCII Tamil | ||
|57005|MakeEncoding.cs|ISCII Telugu | ||
|57006|MakeEncoding.cs|ISCII Assamese | ||
|57007|MakeEncoding.cs|ISCII Oriya | ||
|57008|MakeEncoding.cs|ISCII Kannada | ||
|57009|MakeEncoding.cs|ISCII Malayalam | ||
|57010|MakeEncoding.cs|ISCII Gujarati | ||
|57011|MakeEncoding.cs|ISCII Punjabi | ||
|65000| magic |Unicode (UTF-7) | ||
|65001| magic |Unicode (UTF-8) | ||
| CP# | Source | Description | | ||
|--------:|:-----------:|:-----------------------------------------------------| | ||
| ` 37` | unicode.org | IBM EBCDIC US-Canada | | ||
| ` 437` | unicode.org | OEM United States | | ||
| ` 500` | unicode.org | IBM EBCDIC International | | ||
| ` 620` | NLS | Mazovia (Polish) MS-DOS | | ||
| ` 708` | Windows 7 | Arabic (ASMO 708) | | ||
| ` 720` | Windows 7 | Arabic (Transparent ASMO); Arabic (DOS) | | ||
| ` 737` | unicode.org | OEM Greek (formerly 437G); Greek (DOS) | | ||
| ` 775` | unicode.org | OEM Baltic; Baltic (DOS) | | ||
| ` 808` | unicode.org | OEM Russian; Cyrillic + Euro symbol | | ||
| ` 850` | unicode.org | OEM Multilingual Latin 1; Western European (DOS) | | ||
| ` 852` | unicode.org | OEM Latin 2; Central European (DOS) | | ||
| ` 855` | unicode.org | OEM Cyrillic (primarily Russian) | | ||
| ` 857` | unicode.org | OEM Turkish; Turkish (DOS) | | ||
| ` 858` | Windows 7 | OEM Multilingual Latin 1 + Euro symbol | | ||
| ` 860` | unicode.org | OEM Portuguese; Portuguese (DOS) | | ||
| ` 861` | unicode.org | OEM Icelandic; Icelandic (DOS) | | ||
| ` 862` | unicode.org | OEM Hebrew; Hebrew (DOS) | | ||
| ` 863` | unicode.org | OEM French Canadian; French Canadian (DOS) | | ||
| ` 864` | unicode.org | OEM Arabic; Arabic (864) | | ||
| ` 865` | unicode.org | OEM Nordic; Nordic (DOS) | | ||
| ` 866` | unicode.org | OEM Russian; Cyrillic (DOS) | | ||
| ` 869` | unicode.org | OEM Modern Greek; Greek, Modern (DOS) | | ||
| ` 870` | Windows 7 | IBM EBCDIC Multilingual/ROECE (Latin 2) | | ||
| ` 872` | unicode.org | OEM Cyrillic (primarily Russian) + Euro Symbol | | ||
| ` 874` | unicode.org | Windows Thai | | ||
| ` 875` | unicode.org | IBM EBCDIC Greek Modern | | ||
| ` 895` | NLS | Kamenický (Czech) MS-DOS | | ||
| ` 932` | unicode.org | Japanese Shift-JIS | | ||
| ` 936` | unicode.org | Simplified Chinese GBK | | ||
| ` 949` | unicode.org | Korean | | ||
| ` 950` | unicode.org | Traditional Chinese Big5 | | ||
| ` 1010` | IBM | IBM EBCDIC French | | ||
| ` 1026` | unicode.org | IBM EBCDIC Turkish (Latin 5) | | ||
| ` 1047` | Windows 7 | IBM EBCDIC Latin 1/Open System | | ||
| ` 1132` | IBM | IBM EBCDIC Lao (1132 / 1133 / 1341) | | ||
| ` 1140` | Windows 7 | IBM EBCDIC US-Canada (037 + Euro symbol) | | ||
| ` 1141` | Windows 7 | IBM EBCDIC Germany (20273 + Euro symbol) | | ||
| ` 1142` | Windows 7 | IBM EBCDIC Denmark-Norway (20277 + Euro symbol) | | ||
| ` 1143` | Windows 7 | IBM EBCDIC Finland-Sweden (20278 + Euro symbol) | | ||
| ` 1144` | Windows 7 | IBM EBCDIC Italy (20280 + Euro symbol) | | ||
| ` 1145` | Windows 7 | IBM EBCDIC Latin America-Spain (20284 + Euro symbol) | | ||
| ` 1146` | Windows 7 | IBM EBCDIC United Kingdom (20285 + Euro symbol) | | ||
| ` 1147` | Windows 7 | IBM EBCDIC France (20297 + Euro symbol) | | ||
| ` 1148` | Windows 7 | IBM EBCDIC International (500 + Euro symbol) | | ||
| ` 1149` | Windows 7 | IBM EBCDIC Icelandic (20871 + Euro symbol) | | ||
| ` 1200` | magic | Unicode UTF-16, little endian (BMP of ISO 10646) | | ||
| ` 1201` | magic | Unicode UTF-16, big endian | | ||
| ` 1250` | unicode.org | Windows Central Europe | | ||
| ` 1251` | unicode.org | Windows Cyrillic | | ||
| ` 1252` | unicode.org | Windows Latin I | | ||
| ` 1253` | unicode.org | Windows Greek | | ||
| ` 1254` | unicode.org | Windows Turkish | | ||
| ` 1255` | unicode.org | Windows Hebrew | | ||
| ` 1256` | unicode.org | Windows Arabic | | ||
| ` 1257` | unicode.org | Windows Baltic | | ||
| ` 1258` | unicode.org | Windows Vietnam | | ||
| ` 1361` | Windows 7 | Korean (Johab) | | ||
| `10000` | unicode.org | MAC Roman | | ||
| `10001` | Windows 7 | Japanese (Mac) | | ||
| `10002` | Windows 7 | MAC Traditional Chinese (Big5) | | ||
| `10003` | Windows 7 | Korean (Mac) | | ||
| `10004` | Windows 7 | Arabic (Mac) | | ||
| `10005` | Windows 7 | Hebrew (Mac) | | ||
| `10006` | unicode.org | Greek (Mac) | | ||
| `10007` | unicode.org | Cyrillic (Mac) | | ||
| `10008` | Windows 7 | MAC Simplified Chinese (GB 2312) | | ||
| `10010` | Windows 7 | Romanian (Mac) | | ||
| `10017` | Windows 7 | Ukrainian (Mac) | | ||
| `10021` | Windows 7 | Thai (Mac) | | ||
| `10029` | unicode.org | MAC Latin 2 (Central European) | | ||
| `10079` | unicode.org | Icelandic (Mac) | | ||
| `10081` | unicode.org | Turkish (Mac) | | ||
| `10082` | Windows 7 | Croatian (Mac) | | ||
| `12000` | magic | Unicode UTF-32, little endian byte order | | ||
| `12001` | magic | Unicode UTF-32, big endian byte order | | ||
| `20000` | Windows 7 | CNS Taiwan (Chinese Traditional) | | ||
| `20001` | Windows 7 | TCA Taiwan | | ||
| `20002` | Windows 7 | Eten Taiwan (Chinese Traditional) | | ||
| `20003` | Windows 7 | IBM5550 Taiwan | | ||
| `20004` | Windows 7 | TeleText Taiwan | | ||
| `20005` | Windows 7 | Wang Taiwan | | ||
| `20105` | Windows 7 | Western European IA5 (IRV International Alphabet 5) | | ||
| `20106` | Windows 7 | IA5 German (7-bit) | | ||
| `20107` | Windows 7 | IA5 Swedish (7-bit) | | ||
| `20108` | Windows 7 | IA5 Norwegian (7-bit) | | ||
| `20127` | magic | US-ASCII (7-bit) | | ||
| `20261` | Windows 7 | T.61 | | ||
| `20269` | Windows 7 | ISO 6937 Non-Spacing Accent | | ||
| `20273` | Windows 7 | IBM EBCDIC Germany | | ||
| `20277` | Windows 7 | IBM EBCDIC Denmark-Norway | | ||
| `20278` | Windows 7 | IBM EBCDIC Finland-Sweden | | ||
| `20280` | Windows 7 | IBM EBCDIC Italy | | ||
| `20284` | Windows 7 | IBM EBCDIC Latin America-Spain | | ||
| `20285` | Windows 7 | IBM EBCDIC United Kingdom | | ||
| `20290` | Windows 7 | IBM EBCDIC Japanese Katakana Extended | | ||
| `20297` | Windows 7 | IBM EBCDIC France | | ||
| `20420` | Windows 7 | IBM EBCDIC Arabic | | ||
| `20423` | Windows 7 | IBM EBCDIC Greek | | ||
| `20424` | Windows 7 | IBM EBCDIC Hebrew | | ||
| `20833` | Windows 7 | IBM EBCDIC Korean Extended | | ||
| `20838` | Windows 7 | IBM EBCDIC Thai | | ||
| `20866` | Windows 7 | Russian Cyrillic (KOI8-R) | | ||
| `20871` | Windows 7 | IBM EBCDIC Icelandic | | ||
| `20880` | Windows 7 | IBM EBCDIC Cyrillic Russian | | ||
| `20905` | Windows 7 | IBM EBCDIC Turkish | | ||
| `20924` | Windows 7 | IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) | | ||
| `20932` | Windows 7 | Japanese (JIS 0208-1990 and 0212-1990) | | ||
| `20936` | Windows 7 | Simplified Chinese (GB2312-80) | | ||
| `20949` | Windows 7 | Korean Wansung | | ||
| `21025` | Windows 7 | IBM EBCDIC Cyrillic Serbian-Bulgarian | | ||
| `21027` | NLS | Extended/Ext Alpha Lowercase | | ||
| `21866` | Windows 7 | Ukrainian Cyrillic (KOI8-U) | | ||
| `28591` | unicode.org | ISO 8859-1 Latin 1 (Western European) | | ||
| `28592` | unicode.org | ISO 8859-2 Latin 2 (Central European) | | ||
| `28593` | unicode.org | ISO 8859-3 Latin 3 | | ||
| `28594` | unicode.org | ISO 8859-4 Baltic | | ||
| `28595` | unicode.org | ISO 8859-5 Cyrillic | | ||
| `28596` | unicode.org | ISO 8859-6 Arabic | | ||
| `28597` | unicode.org | ISO 8859-7 Greek | | ||
| `28598` | unicode.org | ISO 8859-8 Hebrew (ISO-Visual) | | ||
| `28599` | unicode.org | ISO 8859-9 Turkish | | ||
| `28600` | unicode.org | ISO 8859-10 Latin 6 | | ||
| `28601` | unicode.org | ISO 8859-11 Latin (Thai) | | ||
| `28603` | unicode.org | ISO 8859-13 Latin 7 (Estonian) | | ||
| `28604` | unicode.org | ISO 8859-14 Latin 8 (Celtic) | | ||
| `28605` | unicode.org | ISO 8859-15 Latin 9 | | ||
| `28606` | unicode.org | ISO 8859-15 Latin 10 | | ||
| `29001` | Windows 7 | Europa 3 | | ||
| `38598` | Windows 7 | ISO 8859-8 Hebrew (ISO-Logical) | | ||
| `47451` | unicode.org | Atari ST/TT | | ||
| `50220` | Windows 7 | ISO 2022 JIS Japanese with no halfwidth Katakana | | ||
| `50221` | Windows 7 | ISO 2022 JIS Japanese with halfwidth Katakana | | ||
| `50222` | Windows 7 | ISO 2022 Japanese JIS X 0201-1989 (1 byte Kana-SO/SI)| | ||
| `50225` | Windows 7 | ISO 2022 Korean | | ||
| `50227` | Windows 7 | ISO 2022 Simplified Chinese | | ||
| `51932` | Windows 7 | EUC Japanese | | ||
| `51936` | Windows 7 | EUC Simplified Chinese | | ||
| `51949` | Windows 7 | EUC Korean | | ||
| `52936` | Windows 7 | HZ-GB2312 Simplified Chinese | | ||
| `54936` | Windows 7 | GB18030 Simplified Chinese (4 byte) | | ||
| `57002` | Windows 7 | ISCII Devanagari | | ||
| `57003` | Windows 7 | ISCII Bengali | | ||
| `57004` | Windows 7 | ISCII Tamil | | ||
| `57005` | Windows 7 | ISCII Telugu | | ||
| `57006` | Windows 7 | ISCII Assamese | | ||
| `57007` | Windows 7 | ISCII Oriya | | ||
| `57008` | Windows 7 | ISCII Kannada | | ||
| `57009` | Windows 7 | ISCII Malayalam | | ||
| `57010` | Windows 7 | ISCII Gujarati | | ||
| `57011` | Windows 7 | ISCII Punjabi | | ||
| `65000` | magic | Unicode (UTF-7) | | ||
| `65001` | magic | Unicode (UTF-8) | | ||
Note that MakeEncoding.cs deviates from unicode.org for some codepages. In the | ||
case of direct conflicts, unicode.org takes precedence. In cases where the | ||
unicode.org listing does not prescribe a value, MakeEncoding.cs value is used. | ||
`unicode.org` refers to the Unicode Consortium Public Mappings, a database of | ||
various mappings between unicode characters and respective character sets. The | ||
tables are processed by a few scripts in the build process. | ||
NLS refers to the National Language Support files supplied in various versions of | ||
Windows. In older versions of Windows (e.g. Windows 98) these files followed the | ||
pattern `CP_#.NLS`, but newer versions use the pattern `C_#.NLS`. | ||
`IBM` refers to the IBM coded character set database. Even though IBM uses a | ||
different numbering scheme from Windows, the IBM numbers are used when there is | ||
no conflict. The tables are manually generated from the symbol PDFs. | ||
`Windows 7` refers to direct inspection of Windows 7 machines using .NET class | ||
`System.Text.Encoding`. The enclosed `MakeEncoding.cs` C# program brute-forces | ||
code pages. MakeEncoding.cs deviates from unicode.org in some cases. When they | ||
map a given code to different characters, unicode.org value is used. When | ||
unicode.org does not prescribe a value, MakeEncoding.cs value is used. | ||
`NLS` refers to the National Language Support files supplied in various versions | ||
of Windows. In older versions of Windows (e.g. Windows 98) these files followed | ||
the name pattern `CP_#.NLS`, but newer versions use the name pattern `C_#.NLS`. | ||
## Sources | ||
- [Unicode Consortium Public Mappings](http://www.unicode.org/Public/MAPPINGS/) | ||
- [Code Page Enumeration](http://msdn.microsoft.com/en-us/library/cc195051.aspx) | ||
- [Code Page Identifiers](http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx) | ||
- [Windows Code Page Enumeration](http://msdn.microsoft.com/en-us/library/cc195051.aspx) | ||
- [Windows Code Page Identifiers](http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx) | ||
- [IBM Coded Character Sets](https://www-01.ibm.com/software/globalization/ccsid/ccsid_registered.html) | ||
@@ -293,0 +319,0 @@ ## Badges |
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
3355627
19210
319