encoding.js
Converts character encoding in JavaScript.
README(Japanese)
Installation
In Browser:
<script src="encoding.js"></script>
or
<script src="encoding.min.js"></script>
Object Encoding will defined in the global scope.
Conversion and detection for the Array (like Array object).
In Node.js:
encoding.js is published by module name of encoding-japanese
in npm.
npm install encoding-japanese
var encoding = require('encoding-japanese');
Each methods are also available for the Buffer in Node.js.
bower:
bower install encoding-japanese
Convert character encoding (convert):
- {Array.<number>|string} Encoding.convert ( data, to_encoding [, from_encoding ] )
Converts character encoding.
@param {Array.<number>|TypedArray|Buffer|string} data The target data.
@param {(string|Object)} to_encoding The encoding name of conversion destination.
@param {(string|Array.<string>)=} [from_encoding] The encoding name of source or 'AUTO'.
@return {Array|string} Return the converted array/string.
var utf8Array = new Uint8Array(...) or [...] or Array(...) or Buffer(...);
var sjisArray = Encoding.convert(utf8Array, 'SJIS', 'UTF8');
var sjisArray = Encoding.convert(utf8Array, 'SJIS');
var sjisArray = Encoding.convert(utf8Array, 'SJIS', 'AUTO');
var detected = Encoding.detect(utf8Array);
if (detected === 'UTF8') {
console.log('Encoding is UTF-8');
}
Available Encodings:
- 'UTF32' (detect only)
- 'UTF16'
- 'UTF16BE'
- 'UTF16LE'
- 'BINARY' (detect only)
- 'ASCII' (detect only)
- 'JIS'
- 'UTF8'
- 'EUCJP'
- 'SJIS'
- 'UNICODE' (JavaScript Unicode Array)
Note: UNICODE is an array that has a value of String.charCodeAt() in JavaScript.
(Each value in the array possibly has a number of more than 256.)
Specify the Object argument
var sjisArray = Encoding.convert(utf8Array, {
to: 'SJIS',
from: 'UTF8'
});
Readability goes up by passing an object to the second argument.
Specify the string argument and 'type' option
var utf8String = 'ã\u0081\u0093ã\u0082\u0093ã\u0081«ã\u0081¡ã\u0081¯';
var unicodeString = Encoding.convert(utf8String, {
to: 'UNICODE',
from: 'UTF8',
type: 'string'
});
console.log(unicodeString);
'type' option available as following types:
- 'string': Return as string.
- 'arraybuffer': Return as ArrayBuffer.
- 'array': Return as Array (default).
Specify BOM in UTF-16
It's can add the UTF16 BOM with specify the bom option on convert.
var utf16Array = Encoding.convert(utf8Array, {
to: 'UTF16',
from: 'UTF8',
bom: true
});
The byte order of UTF16 is big-endian by default.
Specify the 'LE' in bom options if you want to convert as little-endian.
var utf16leArray = Encoding.convert(utf8Array, {
to: 'UTF16',
from: 'UTF8',
bom: 'LE'
});
Convert with specifying the UTF16LE or UTF16BE if BOM is not required.
var utf16beArray = Encoding.convert(utf8Array, {
to: 'UTF16BE',
from: 'UTF8'
});
Note: UTF16, UTF16BE and UTF16LE is not JavaScript internal encoding, that is a byte array.
Detect character encoding (detect):
- {string|boolean} Encoding.detect ( data [, encodings ] )
Detect character encoding.
@param {Array.<number>|TypedArray|string} data Target data
@param {(string|Array.<string>)} [encodings] The encoding name that to specify the detection.
@return {string|boolean} Return the detected character encoding, or false.
var detected = Encoding.detect(utf8Array);
if (detected === 'UTF8') {
console.log('Encoding is UTF-8');
}
var isSJIS = Encoding.detect(sjisArray, 'SJIS');
if (isSJIS) {
console.log('Encoding is SJIS');
}
URL Encode/Decode:
-
{string} Encoding.urlEncode ( data )
URL(percent) encode.
@param {Array.<number>|TypedArray} data Target data.
@return {string} Return the encoded string.
-
{Array.<number>} Encoding.urlDecode ( string )
URL(percent) decode.
@param {string} string Target data.
@return {Array.<number>} Return the decoded array.
var sjisArray = [
130, 177, 130, 241, 130, 201, 130, 191, 130, 205, 129,
65, 130, 217, 130, 176, 129, 153, 130, 210, 130, 230
];
var encoded = Encoding.urlEncode(sjisArray);
console.log(encoded);
var decoded = Encoding.urlDecode(encoded);
console.log(decoded);
Base64 Encode/Decode:
-
{string} Encoding.base64Encode ( data )
Base64 encode.
@param {Array.<number>|TypedArray} data Target data.
@return {string} Return the Base64 encoded string.
-
{Array.<number>} Encoding.base64Decode ( string )
Base64 decode.
@param {string} string Target data.
@return {Array.<number>} Return the Base64 decoded array.
var sjisArray = [
130, 177, 130, 241, 130, 201, 130, 191, 130, 205
];
var encoded = Encoding.base64Encode(sjisArray);
console.log(encoded);
var decoded = Encoding.base64Decode(encoded);
console.log(decoded);
Example:
Example using the XMLHttpRequest and Typed arrays (Uint8Array):
In this sample, reads the text file written in Shift_JIS as binary data.
And displays string that is converted to Unicode by Encoding.convert.
var req = new XMLHttpRequest();
req.open('GET', '/my-shift_jis.txt', true);
req.responseType = 'arraybuffer';
req.onload = function (event) {
var buffer = req.response;
if (buffer) {
var sjisArray = new Uint8Array(buffer);
var unicodeArray = Encoding.convert(sjisArray, {
to: 'UNICODE',
from: 'SJIS'
});
var unicodeString = Encoding.codeToString(unicodeArray);
console.log(unicodeString);
}
};
req.send(null);
Convert encoding for file using the File APIs:
Reads file using the File APIs.
Detect file encoding and convert to Unicode, and display it.
<input type="file" id="file">
<div id="encoding"></div>
<textarea id="result" rows="5" cols="80"></textarea>
<script>
function onFileSelect(event) {
var file = event.target.files[0];
var reader = new FileReader();
reader.onload = function(e) {
var codes = new Uint8Array(e.target.result);
var encoding = Encoding.detect(codes);
document.getElementById('encoding').textContent = encoding;
var unicodeString = Encoding.convert(codes, {
to: 'unicode',
from: encoding,
type: 'string'
});
document.getElementById('result').value = unicodeString;
};
reader.readAsArrayBuffer(file);
}
document.getElementById('file').addEventListener('change', onFileSelect, false);
</script>
Demo
Example of the character encoding conversion:
var eucjpArray = [
164, 179, 164, 243, 164, 203, 164, 193, 164, 207, 161,
162, 164, 219, 164, 178, 161, 249, 164, 212, 164, 232
];
var utf8Array = Encoding.convert(eucjpArray, {
to: 'UTF8',
from: 'EUCJP'
});
console.log( utf8Array );
Example of convert a character code by automatic detection (Auto detect):
var sjisArray = [
130, 177, 130, 241, 130, 201, 130, 191, 130, 205, 129,
65, 130, 217, 130, 176, 129, 153, 130, 210, 130, 230
];
var unicodeArray = Encoding.convert(sjisArray, {
to: 'UNICODE',
from: 'AUTO'
});
console.log( Encoding.codeToString(unicodeArray) );
Utilities
-
{string} Encoding.codeToString ( {Array.<number>|TypedArray} data )
Joins a character code array to string.
-
{Array.<number>} Encoding.stringToCode ( {string} string )
Splits string to an array of character codes.
Japanese Zenkaku/Hankaku
-
{Array.<number>|string} Encoding.toHankakuCase ( {Array.<number>|string} data )
Convert the ascii symbols and alphanumeric characters to the zenkaku symbols and alphanumeric characters.
-
{Array.<number>|string} Encoding.toZenkakuCase ( {Array.<number>|string} data )
Convert to the zenkaku symbols and alphanumeric characters from the ascii symbols and alphanumeric characters.
-
{Array.<number>|string} Encoding.toHiraganaCase ( {Array.<number>|string} data )
Convert to the zenkaku hiragana from the zenkaku katakana.
-
{Array.<number>|string} Encoding.toKatakanaCase ( {Array.<number>|string} data )
Convert to the zenkaku katakana from the zenkaku hiragana.
-
{Array.<number>|string} Encoding.toHankanaCase ( {Array.<number>|string} data )
Convert to the hankaku katakana from the zenkaku katakana.
-
{Array.<number>|string} Encoding.toZenkanaCase ( {Array.<number>|string} data )
Convert to the zenkaku katakana from the hankaku katakana.
-
{Array.<number>|string} Encoding.toHankakuSpace ({Array.<number>|string} data )
Convert the em space(U+3000) to the single space(U+0020).
-
{Array.<number>|string} Encoding.toZenkakuSpace ( {Array.<number>|string} data )
Convert the single space(U+0020) to the em space(U+3000).
Demo
License
MIT