Socket
Socket
Sign inDemoInstall

iconv-lite

Package Overview
Dependencies
0
Maintainers
1
Versions
51
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 0.4.8 to 0.4.9

lib/bom-handling.js

12

Changelog.md
# 0.4.9 / 2015-05-24
* Streamlined BOM handling: strip BOM by default, add BOM when encoding if
addBOM: true. Added docs to Readme.
* UTF16 now uses UTF16-LE by default.
* Fixed minor issue with big5 encoding.
* Added io.js testing on Travis; updated node-iconv version to test against.
Now we just skip testing SBCS encodings that node-iconv doesn't support.
* (internal refactoring) Updated codec interface to use classes.
* Use strict mode in all files.
# 0.4.8 / 2015-04-14

@@ -3,0 +15,0 @@

137

encodings/dbcs-codec.js

@@ -0,1 +1,2 @@

"use strict"

@@ -6,5 +7,3 @@ // Multibyte codec. In this scheme, a character is represented by 1 or more bytes.

exports._dbcs = function(options) {
return new DBCSCodec(options);
}
exports._dbcs = DBCSCodec;

@@ -23,11 +22,11 @@ var UNASSIGNED = -1,

// Class DBCSCodec reads and initializes mapping tables.
function DBCSCodec(options) {
this.options = options;
if (!options)
function DBCSCodec(codecOptions, iconv) {
this.encodingName = codecOptions.encodingName;
if (!codecOptions)
throw new Error("DBCS codec is called without the data.")
if (!options.table)
throw new Error("Encoding '" + options.encodingName + "' has no data.");
if (!codecOptions.table)
throw new Error("Encoding '" + this.encodingName + "' has no data.");
// Load tables.
var mappingTable = options.table();
var mappingTable = codecOptions.table();

@@ -54,3 +53,3 @@

this.defaultCharUnicode = options.iconv.defaultCharUnicode;
this.defaultCharUnicode = iconv.defaultCharUnicode;

@@ -75,7 +74,10 @@

var skipEncodeChars = {};
if (options.encodeSkipVals)
for (var i = 0; i < options.encodeSkipVals.length; i++) {
var range = options.encodeSkipVals[i];
for (var j = range.from; j <= range.to; j++)
skipEncodeChars[j] = true;
if (codecOptions.encodeSkipVals)
for (var i = 0; i < codecOptions.encodeSkipVals.length; i++) {
var val = codecOptions.encodeSkipVals[i];
if (typeof val === 'number')
skipEncodeChars[val] = true;
else
for (var j = val.from; j <= val.to; j++)
skipEncodeChars[j] = true;
}

@@ -87,9 +89,9 @@

// Add more encoding pairs when needed.
if (options.encodeAdd) {
for (var uChar in options.encodeAdd)
if (Object.prototype.hasOwnProperty.call(options.encodeAdd, uChar))
this._setEncodeChar(uChar.charCodeAt(0), options.encodeAdd[uChar]);
if (codecOptions.encodeAdd) {
for (var uChar in codecOptions.encodeAdd)
if (Object.prototype.hasOwnProperty.call(codecOptions.encodeAdd, uChar))
this._setEncodeChar(uChar.charCodeAt(0), codecOptions.encodeAdd[uChar]);
}
this.defCharSB = this.encodeTable[0][options.iconv.defaultCharSingleByte.charCodeAt(0)];
this.defCharSB = this.encodeTable[0][iconv.defaultCharSingleByte.charCodeAt(0)];
if (this.defCharSB === UNASSIGNED) this.defCharSB = this.encodeTable[0]['?'];

@@ -100,4 +102,4 @@ if (this.defCharSB === UNASSIGNED) this.defCharSB = "?".charCodeAt(0);

// Load & create GB18030 tables when needed.
if (typeof options.gb18030 === 'function') {
this.gb18030 = options.gb18030(); // Load GB18030 ranges.
if (typeof codecOptions.gb18030 === 'function') {
this.gb18030 = codecOptions.gb18030(); // Load GB18030 ranges.

@@ -124,45 +126,5 @@ // Add GB18030 decode tables.

// Public interface: create encoder and decoder objects.
// The methods (write, end) are simple functions to not inhibit optimizations.
DBCSCodec.prototype.encoder = function encoderDBCS(options) {
return {
// Methods
write: encoderDBCSWrite,
end: encoderDBCSEnd,
DBCSCodec.prototype.encoder = DBCSEncoder;
DBCSCodec.prototype.decoder = DBCSDecoder;
// Encoder state
leadSurrogate: -1,
seqObj: undefined,
// Static data
encodeTable: this.encodeTable,
encodeTableSeq: this.encodeTableSeq,
defaultCharSingleByte: this.defCharSB,
gb18030: this.gb18030,
// Export for testing
findIdx: findIdx,
}
}
DBCSCodec.prototype.decoder = function decoderDBCS(options) {
return {
// Methods
write: decoderDBCSWrite,
end: decoderDBCSEnd,
// Decoder state
nodeIdx: 0,
prevBuf: new Buffer(0),
// Static data
decodeTables: this.decodeTables,
decodeTableSeq: this.decodeTableSeq,
defaultCharUnicode: this.defaultCharUnicode,
gb18030: this.gb18030,
}
}
// Decoder helpers

@@ -188,3 +150,3 @@ DBCSCodec.prototype._getDecodeTrieNode = function(addr) {

else
throw new Error("Overwrite byte in " + this.options.encodingName + ", addr: " + addr.toString(16));
throw new Error("Overwrite byte in " + this.encodingName + ", addr: " + addr.toString(16));
}

@@ -214,3 +176,3 @@ return node;

else
throw new Error("Incorrect surrogate pair in " + this.options.encodingName + " at chunk " + chunk[0]);
throw new Error("Incorrect surrogate pair in " + this.encodingName + " at chunk " + chunk[0]);
}

@@ -236,6 +198,6 @@ else if (0x0FF0 < code && code <= 0x0FFF) { // Character sequence (our own encoding used)

else
throw new Error("Incorrect type '" + typeof part + "' given in " + this.options.encodingName + " at chunk " + chunk[0]);
throw new Error("Incorrect type '" + typeof part + "' given in " + this.encodingName + " at chunk " + chunk[0]);
}
if (curAddr > 0xFF)
throw new Error("Incorrect chunk in " + this.options.encodingName + " at addr " + chunk[0] + ": too long" + curAddr);
throw new Error("Incorrect chunk in " + this.encodingName + " at addr " + chunk[0] + ": too long" + curAddr);
}

@@ -316,6 +278,17 @@

// == Actual Encoding ==========================================================
// == Encoder ==================================================================
function DBCSEncoder(options, codec) {
// Encoder state
this.leadSurrogate = -1;
this.seqObj = undefined;
// Static data
this.encodeTable = codec.encodeTable;
this.encodeTableSeq = codec.encodeTableSeq;
this.defaultCharSingleByte = codec.defCharSB;
this.gb18030 = codec.gb18030;
}
function encoderDBCSWrite(str) {
DBCSEncoder.prototype.write = function(str) {
var newBuf = new Buffer(str.length * (this.gb18030 ? 4 : 3)),

@@ -440,3 +413,3 @@ leadSurrogate = this.leadSurrogate,

function encoderDBCSEnd() {
DBCSEncoder.prototype.end = function() {
if (this.leadSurrogate === -1 && this.seqObj === undefined)

@@ -472,7 +445,21 @@ return; // All clean. Most often case.

// Export for testing
DBCSEncoder.prototype.findIdx = findIdx;
// == Actual Decoding ==========================================================
// == Decoder ==================================================================
function decoderDBCSWrite(buf) {
function DBCSDecoder(options, codec) {
// Decoder state
this.nodeIdx = 0;
this.prevBuf = new Buffer(0);
// Static data
this.decodeTables = codec.decodeTables;
this.decodeTableSeq = codec.decodeTableSeq;
this.defaultCharUnicode = codec.defaultCharUnicode;
this.gb18030 = codec.gb18030;
}
DBCSDecoder.prototype.write = function(buf) {
var newBuf = new Buffer(buf.length*2),

@@ -545,3 +532,3 @@ nodeIdx = this.nodeIdx,

function decoderDBCSEnd() {
DBCSDecoder.prototype.end = function() {
var ret = '';

@@ -559,3 +546,3 @@

if (buf.length > 0)
ret += decoderDBCSWrite.call(this, buf);
ret += this.write(buf);
}

@@ -562,0 +549,0 @@

@@ -0,1 +1,2 @@

"use strict"

@@ -162,2 +163,3 @@ // Description of supported double byte encodings and aliases.

table: function() { return require('./tables/cp950.json').concat(require('./tables/big5-added.json')) },
encodeSkipVals: [0xa2cc],
},

@@ -164,0 +166,0 @@

@@ -0,1 +1,2 @@

"use strict"

@@ -2,0 +3,0 @@ // Update this array if you add/rename/remove files in this directory.

@@ -0,32 +1,37 @@

"use strict"
// Export Node.js internal encodings.
var utf16lebom = new Buffer([0xFF, 0xFE]);
module.exports = {
// Encodings
utf8: { type: "_internal", enc: "utf8" },
cesu8: { type: "_internal", enc: "utf8" },
unicode11utf8: { type: "_internal", enc: "utf8" },
ucs2: { type: "_internal", enc: "ucs2", bom: utf16lebom },
utf16le:{ type: "_internal", enc: "ucs2", bom: utf16lebom },
binary: { type: "_internal", enc: "binary" },
base64: { type: "_internal", enc: "base64" },
hex: { type: "_internal", enc: "hex" },
utf8: { type: "_internal", bomAware: true},
cesu8: "utf8",
unicode11utf8: "utf8",
// Codec.
_internal: function(options) {
if (!options || !options.enc)
throw new Error("Internal codec is called without encoding type.")
ucs2: { type: "_internal", bomAware: true},
utf16le: "ucs2",
return {
encoder: options.enc == "base64" ? encoderBase64 : encoderInternal,
decoder: decoderInternal,
binary: { type: "_internal" },
base64: { type: "_internal" },
hex: { type: "_internal" },
enc: options.enc,
bom: options.bom,
};
},
// Codec.
_internal: InternalCodec,
};
//------------------------------------------------------------------------------
function InternalCodec(codecOptions) {
this.enc = codecOptions.encodingName;
this.bomAware = codecOptions.bomAware;
if (this.enc === "base64")
this.encoder = InternalEncoderBase64;
}
InternalCodec.prototype.encoder = InternalEncoder;
InternalCodec.prototype.decoder = InternalDecoder;
//------------------------------------------------------------------------------
// We use node.js internal decoder. It's signature is the same as ours.

@@ -38,34 +43,33 @@ var StringDecoder = require('string_decoder').StringDecoder;

function decoderInternal() {
return new StringDecoder(this.enc);
function InternalDecoder(options, codec) {
StringDecoder.call(this, codec.enc);
}
InternalDecoder.prototype = StringDecoder.prototype;
//------------------------------------------------------------------------------
// Encoder is mostly trivial
function encoderInternal() {
return {
write: encodeInternal,
end: function() {},
enc: this.enc,
}
function InternalEncoder(options, codec) {
this.enc = codec.enc;
}
function encodeInternal(str) {
InternalEncoder.prototype.write = function(str) {
return new Buffer(str, this.enc);
}
InternalEncoder.prototype.end = function() {
}
//------------------------------------------------------------------------------
// Except base64 encoder, which must keep its state.
function encoderBase64() {
return {
write: encodeBase64Write,
end: encodeBase64End,
prevStr: '',
};
function InternalEncoderBase64(options, codec) {
this.prevStr = '';
}
function encodeBase64Write(str) {
InternalEncoderBase64.prototype.write = function(str) {
str = this.prevStr + str;

@@ -79,5 +83,5 @@ var completeQuads = str.length - (str.length % 4);

function encodeBase64End() {
InternalEncoderBase64.prototype.end = function() {
return new Buffer(this.prevStr, "base64");
}

@@ -0,1 +1,2 @@

"use strict"

@@ -5,45 +6,39 @@ // Single-byte codec. Needs a 'chars' string parameter that contains 256 or 128 chars that

exports._sbcs = function(options) {
if (!options)
exports._sbcs = SBCSCodec;
function SBCSCodec(codecOptions, iconv) {
if (!codecOptions)
throw new Error("SBCS codec is called without the data.")
// Prepare char buffer for decoding.
if (!options.chars || (options.chars.length !== 128 && options.chars.length !== 256))
throw new Error("Encoding '"+options.type+"' has incorrect 'chars' (must be of len 128 or 256)");
if (!codecOptions.chars || (codecOptions.chars.length !== 128 && codecOptions.chars.length !== 256))
throw new Error("Encoding '"+codecOptions.type+"' has incorrect 'chars' (must be of len 128 or 256)");
if (options.chars.length === 128) {
if (codecOptions.chars.length === 128) {
var asciiString = "";
for (var i = 0; i < 128; i++)
asciiString += String.fromCharCode(i);
options.chars = asciiString + options.chars;
codecOptions.chars = asciiString + codecOptions.chars;
}
var decodeBuf = new Buffer(options.chars, 'ucs2');
this.decodeBuf = new Buffer(codecOptions.chars, 'ucs2');
// Encoding buffer.
var encodeBuf = new Buffer(65536);
encodeBuf.fill(options.iconv.defaultCharSingleByte.charCodeAt(0));
encodeBuf.fill(iconv.defaultCharSingleByte.charCodeAt(0));
for (var i = 0; i < options.chars.length; i++)
encodeBuf[options.chars.charCodeAt(i)] = i;
for (var i = 0; i < codecOptions.chars.length; i++)
encodeBuf[codecOptions.chars.charCodeAt(i)] = i;
return {
encoder: encoderSBCS,
decoder: decoderSBCS,
encodeBuf: encodeBuf,
decodeBuf: decodeBuf,
};
this.encodeBuf = encodeBuf;
}
function encoderSBCS(options) {
return {
write: encoderSBCSWrite,
end: function() {},
SBCSCodec.prototype.encoder = SBCSEncoder;
SBCSCodec.prototype.decoder = SBCSDecoder;
encodeBuf: this.encodeBuf,
};
function SBCSEncoder(options, codec) {
this.encodeBuf = codec.encodeBuf;
}
function encoderSBCSWrite(str) {
SBCSEncoder.prototype.write = function(str) {
var buf = new Buffer(str.length);

@@ -56,13 +51,11 @@ for (var i = 0; i < str.length; i++)

SBCSEncoder.prototype.end = function() {
}
function decoderSBCS(options) {
return {
write: decoderSBCSWrite,
end: function() {},
decodeBuf: this.decodeBuf,
};
function SBCSDecoder(options, codec) {
this.decodeBuf = codec.decodeBuf;
}
function decoderSBCSWrite(buf) {
SBCSDecoder.prototype.write = function(buf) {
// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.

@@ -72,3 +65,3 @@ var decodeBuf = this.decodeBuf;

var idx1 = 0, idx2 = 0;
for (var i = 0, _len = buf.length; i < _len; i++) {
for (var i = 0; i < buf.length; i++) {
idx1 = buf[i]*2; idx2 = i*2;

@@ -80,1 +73,4 @@ newBuf[idx2] = decodeBuf[idx1];

}
SBCSDecoder.prototype.end = function() {
}

@@ -0,1 +1,2 @@

"use strict"

@@ -2,0 +3,0 @@ // Generated data for sbcs codec. Don't edit manually. Regenerate using generation/gen-sbcs.js script.

@@ -0,1 +1,2 @@

"use strict"

@@ -2,0 +3,0 @@ // Manually added data to be used by sbcs codec in addition to generated one.

@@ -0,13 +1,12 @@

"use strict"
// == UTF16-BE codec. ==========================================================
exports.utf16be = function(options) {
return {
encoder: utf16beEncoder,
decoder: utf16beDecoder,
exports.utf16be = Utf16BECodec;
function Utf16BECodec() {
}
bom: new Buffer([0xFE, 0xFF]),
};
};
Utf16BECodec.prototype.encoder = Utf16BEEncoder;
Utf16BECodec.prototype.decoder = Utf16BEDecoder;
Utf16BECodec.prototype.bomAware = true;

@@ -17,10 +16,6 @@

function utf16beEncoder(options) {
return {
write: utf16beEncoderWrite,
end: function() {},
}
function Utf16BEEncoder() {
}
function utf16beEncoderWrite(str) {
Utf16BEEncoder.prototype.write = function(str) {
var buf = new Buffer(str, 'ucs2');

@@ -33,15 +28,13 @@ for (var i = 0; i < buf.length; i += 2) {

Utf16BEEncoder.prototype.end = function() {
}
// -- Decoding
function utf16beDecoder(options) {
return {
write: utf16beDecoderWrite,
end: function() {},
overflowByte: -1,
};
function Utf16BEDecoder() {
this.overflowByte = -1;
}
function utf16beDecoderWrite(buf) {
Utf16BEDecoder.prototype.write = function(buf) {
if (buf.length == 0)

@@ -69,51 +62,38 @@ return '';

Utf16BEDecoder.prototype.end = function() {
}
// == UTF-16 codec =============================================================
// Decoder chooses automatically from UTF-16LE and UTF-16BE using BOM and space-based heuristic.
// Defaults to UTF-16BE, according to RFC 2781, although it is against some industry practices, see
// Defaults to UTF-16LE, as it's prevalent and default in Node.
// http://en.wikipedia.org/wiki/UTF-16 and http://encoding.spec.whatwg.org/#utf-16le
// Decoder default can be changed: iconv.decode(buf, 'utf16', {default: 'utf-16le'});
// Decoder default can be changed: iconv.decode(buf, 'utf16', {defaultEncoding: 'utf-16be'});
// Encoder prepends BOM and uses UTF-16BE.
// Endianness can also be changed: iconv.encode(str, 'utf16', {use: 'utf-16le'});
// Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
exports.utf16 = function(options) {
return {
encoder: utf16Encoder,
decoder: utf16Decoder,
exports.utf16 = Utf16Codec;
function Utf16Codec(codecOptions, iconv) {
this.iconv = iconv;
}
getCodec: options.iconv.getCodec,
};
};
Utf16Codec.prototype.encoder = Utf16Encoder;
Utf16Codec.prototype.decoder = Utf16Decoder;
// -- Encoding
function utf16Encoder(options) {
// -- Encoding (pass-through)
function Utf16Encoder(options, codec) {
options = options || {};
var codec = this.getCodec(options.use || 'utf-16be');
if (!codec.bom)
throw new Error("iconv-lite: in UTF-16 encoder, 'use' parameter should be either UTF-16BE or UTF16-LE.");
return {
write: utf16EncoderWrite,
end: utf16EncoderEnd,
bom: codec.bom,
internalEncoder: codec.encoder(options),
};
if (options.addBOM === undefined)
options.addBOM = true;
this.encoder = codec.iconv.getEncoder('utf-16le', options);
}
function utf16EncoderWrite(str) {
var buf = this.internalEncoder.write(str);
if (this.bom) {
buf = Buffer.concat([this.bom, buf]);
this.bom = null;
}
return buf;
Utf16Encoder.prototype.write = function(str) {
return this.encoder.write(str);
}
function utf16EncoderEnd() {
return this.internalEncoder.end();
Utf16Encoder.prototype.end = function() {
return this.encoder.end();
}

@@ -124,83 +104,75 @@

function utf16Decoder(options) {
return {
write: utf16DecoderWrite,
end: utf16DecoderEnd,
function Utf16Decoder(options, codec) {
this.decoder = null;
this.initialBytes = [];
this.initialBytesLen = 0;
internalDecoder: null,
initialBytes: [],
initialBytesLen: 0,
options: options || {},
getCodec: this.getCodec,
};
this.options = options || {};
this.iconv = codec.iconv;
}
function utf16DecoderWrite(buf) {
if (this.internalDecoder)
return this.internalDecoder.write(buf);
Utf16Decoder.prototype.write = function(buf) {
if (!this.decoder) {
// Codec is not chosen yet. Accumulate initial bytes.
this.initialBytes.push(buf);
this.initialBytesLen += buf.length;
if (this.initialBytesLen < 16) // We need more bytes to use space heuristic (see below)
return '';
// Codec is not chosen yet. Accumulate initial bytes.
this.initialBytes.push(buf);
this.initialBytesLen += buf.length;
if (this.initialBytesLen < 16) // We need > 2 bytes to use space heuristic (see below)
return '';
// We have enough bytes -> detect endianness.
var buf = Buffer.concat(this.initialBytes),
encoding = detectEncoding(buf, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
this.initialBytes.length = this.initialBytesLen = 0;
}
// We have enough bytes -> decide endianness.
return utf16DecoderDecideEndianness.call(this);
return this.decoder.write(buf);
}
function utf16DecoderEnd() {
if (this.internalDecoder)
return this.internalDecoder.end();
Utf16Decoder.prototype.end = function() {
if (!this.decoder) {
var buf = Buffer.concat(this.initialBytes),
encoding = detectEncoding(buf, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
var res = utf16DecoderDecideEndianness.call(this);
var trail;
var res = this.decoder.write(buf),
trail = this.decoder.end();
if (this.internalDecoder)
trail = this.internalDecoder.end();
return (trail && trail.length > 0) ? (res + trail) : res;
return trail ? (res + trail) : res;
}
return this.decoder.end();
}
function utf16DecoderDecideEndianness() {
var buf = Buffer.concat(this.initialBytes);
this.initialBytes.length = this.initialBytesLen = 0;
function detectEncoding(buf, defaultEncoding) {
var enc = defaultEncoding || 'utf-16le';
if (buf.length < 2)
return ''; // Not a valid UTF-16 sequence anyway.
if (buf.length >= 2) {
// Check BOM.
if (buf[0] == 0xFE && buf[1] == 0xFF) // UTF-16BE BOM
enc = 'utf-16be';
else if (buf[0] == 0xFF && buf[1] == 0xFE) // UTF-16LE BOM
enc = 'utf-16le';
else {
// No BOM found. Try to deduce encoding from initial content.
// Most of the time, the content has spaces (U+0020), but the opposite (U+2000) is very uncommon.
// So, we count spaces as if it was LE or BE, and decide from that.
var spacesLE = 0, spacesBE = 0, // Counts of space chars in both positions
_len = Math.min(buf.length - (buf.length % 2), 64); // Len is always even.
// Default encoding.
var enc = this.options.default || 'utf-16be';
for (var i = 0; i < _len; i += 2) {
if (buf[i] == 0x00 && buf[i+1] == 0x20) spacesBE++;
if (buf[i] == 0x20 && buf[i+1] == 0x00) spacesLE++;
}
// Check BOM.
if (buf[0] == 0xFE && buf[1] == 0xFF) { // UTF-16BE BOM
enc = 'utf-16be'; buf = buf.slice(2);
}
else if (buf[0] == 0xFF && buf[1] == 0xFE) { // UTF-16LE BOM
enc = 'utf-16le'; buf = buf.slice(2);
}
else {
// No BOM found. Try to deduce encoding from initial content.
// Most of the time, the content has spaces (U+0020), but the opposite (U+2000) is very uncommon.
// So, we count spaces as if it was LE or BE, and decide from that.
var spaces = [0, 0], // Counts of space chars in both positions
_len = Math.min(buf.length - (buf.length % 2), 64); // Len is always even.
for (var i = 0; i < _len; i += 2) {
if (buf[i] == 0x00 && buf[i+1] == 0x20) spaces[0]++;
if (buf[i] == 0x20 && buf[i+1] == 0x00) spaces[1]++;
if (spacesBE > 0 && spacesLE == 0)
enc = 'utf-16be';
else if (spacesBE == 0 && spacesLE > 0)
enc = 'utf-16le';
}
if (spaces[0] > 0 && spaces[1] == 0)
enc = 'utf-16be';
else if (spaces[0] == 0 && spaces[1] > 0)
enc = 'utf-16le';
}
this.internalDecoder = this.getCodec(enc).decoder(this.options);
return this.internalDecoder.write(buf);
return enc;
}

@@ -0,34 +1,26 @@

"use strict"
// UTF-7 codec, according to https://tools.ietf.org/html/rfc2152
// Below is UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3
// See also below a UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3
exports.utf7 = function(options) {
return {
encoder: function utf7Encoder() {
return {
write: utf7EncoderWrite,
end: function() {},
exports.utf7 = Utf7Codec;
exports.unicode11utf7 = 'utf7'; // Alias UNICODE-1-1-UTF-7
function Utf7Codec(codecOptions, iconv) {
this.iconv = iconv;
};
iconv: options.iconv,
};
},
decoder: function utf7Decoder() {
return {
write: utf7DecoderWrite,
end: utf7DecoderEnd,
Utf7Codec.prototype.encoder = Utf7Encoder;
Utf7Codec.prototype.decoder = Utf7Decoder;
Utf7Codec.prototype.bomAware = true;
iconv: options.iconv,
inBase64: false,
base64Accum: '',
};
},
};
};
exports.unicode11utf7 = 'utf7'; // Alias UNICODE-1-1-UTF-7
// -- Encoding
var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g;
function utf7EncoderWrite(str) {
function Utf7Encoder(options, codec) {
this.iconv = codec.iconv;
}
Utf7Encoder.prototype.write = function(str) {
// Naive implementation.

@@ -43,3 +35,14 @@ // Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".

Utf7Encoder.prototype.end = function() {
}
// -- Decoding
function Utf7Decoder(options, codec) {
this.iconv = codec.iconv;
this.inBase64 = false;
this.base64Accum = '';
}
var base64Regex = /[A-Za-z0-9\/+]/;

@@ -54,3 +57,3 @@ var base64Chars = [];

function utf7DecoderWrite(buf) {
Utf7Decoder.prototype.write = function(buf) {
var res = "", lastI = 0,

@@ -107,3 +110,3 @@ inBase64 = this.inBase64,

function utf7DecoderEnd() {
Utf7Decoder.prototype.end = function() {
var res = "";

@@ -131,30 +134,22 @@ if (this.inBase64 && this.base64Accum.length > 0)

exports.utf7imap = function(options) {
return {
encoder: function utf7ImapEncoder() {
return {
write: utf7ImapEncoderWrite,
end: utf7ImapEncoderEnd,
exports.utf7imap = Utf7IMAPCodec;
function Utf7IMAPCodec(codecOptions, iconv) {
this.iconv = iconv;
};
iconv: options.iconv,
inBase64: false,
base64Accum: new Buffer(6),
base64AccumIdx: 0,
};
},
decoder: function utf7ImapDecoder() {
return {
write: utf7ImapDecoderWrite,
end: utf7ImapDecoderEnd,
Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder;
Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder;
Utf7IMAPCodec.prototype.bomAware = true;
iconv: options.iconv,
inBase64: false,
base64Accum: '',
};
},
};
};
// -- Encoding
function utf7ImapEncoderWrite(str) {
function Utf7IMAPEncoder(options, codec) {
this.iconv = codec.iconv;
this.inBase64 = false;
this.base64Accum = new Buffer(6);
this.base64AccumIdx = 0;
}
Utf7IMAPEncoder.prototype.write = function(str) {
var inBase64 = this.inBase64,

@@ -208,3 +203,3 @@ base64Accum = this.base64Accum,

function utf7ImapEncoderEnd() {
Utf7IMAPEncoder.prototype.end = function() {
var buf = new Buffer(10), bufIdx = 0;

@@ -225,6 +220,14 @@ if (this.inBase64) {

// -- Decoding
function Utf7IMAPDecoder(options, codec) {
this.iconv = codec.iconv;
this.inBase64 = false;
this.base64Accum = '';
}
var base64IMAPChars = base64Chars.slice();
base64IMAPChars[','.charCodeAt(0)] = true;
function utf7ImapDecoderWrite(buf) {
Utf7IMAPDecoder.prototype.write = function(buf) {
var res = "", lastI = 0,

@@ -282,3 +285,3 @@ inBase64 = this.inBase64,

function utf7ImapDecoderEnd() {
Utf7IMAPDecoder.prototype.end = function() {
var res = "";

@@ -285,0 +288,0 @@ if (this.inBase64 && this.base64Accum.length > 0)

@@ -0,1 +1,2 @@

"use strict"

@@ -17,3 +18,3 @@ // == Extend Node primitives to use iconv-lite =================================

Buffer.isNativeEncoding = function(enc) {
return nodeNativeEncodings[enc && enc.toLowerCase()];
return enc && nodeNativeEncodings[enc.toLowerCase()];
}

@@ -27,9 +28,3 @@

encoding = String(encoding || 'utf8').toLowerCase();
start = +start || 0;
if (typeof end !== 'number') end = this.length;
// Fastpath empty strings
if (+end == start)
return '';
// Use native conversion when possible

@@ -173,8 +168,5 @@ if (Buffer.isNativeEncoding(encoding))

Readable.prototype.setEncoding = function setEncoding(enc, options) {
// Try to use original function when possible.
if (Buffer.isNativeEncoding(enc))
return original.ReadableSetEncoding.call(this, enc);
// Try to use our own decoder, it has the same interface.
this._readableState.decoder = iconv.getCodec(enc).decoder(options);
// Use our own decoder, it has the same interface.
// We cannot use original function as it doesn't handle BOM-s.
this._readableState.decoder = iconv.getDecoder(enc, options);
this._readableState.encoding = enc;

@@ -181,0 +173,0 @@ }

@@ -0,3 +1,5 @@

"use strict"
var iconv = module.exports;
var bomHandling = require('./bom-handling'),
iconv = module.exports;

@@ -16,3 +18,3 @@ // All codecs and aliases are kept here, keyed by encoding name/alias.

var encoder = iconv.getCodec(encoding).encoder(options);
var encoder = iconv.getEncoder(encoding, options);

@@ -35,3 +37,3 @@ var res = encoder.write(str);

var decoder = iconv.getCodec(encoding).decoder(options);
var decoder = iconv.getDecoder(encoding, options);

@@ -41,3 +43,3 @@ var res = decoder.write(buf);

return (trail && trail.length > 0) ? (res + trail) : res;
return trail ? (res + trail) : res;
}

@@ -68,39 +70,35 @@

// Traverse iconv.encodings to find actual codec.
var codecData, codecOptions;
var codecOptions = {};
while (true) {
codecData = iconv._codecDataCache[enc];
if (codecData)
return codecData;
var codec = iconv._codecDataCache[enc];
if (codec)
return codec;
var codec = iconv.encodings[enc];
var codecDef = iconv.encodings[enc];
switch (typeof codec) {
switch (typeof codecDef) {
case "string": // Direct alias to other encoding.
enc = codec;
enc = codecDef;
break;
case "object": // Alias with options. Can be layered.
if (!codecOptions) {
codecOptions = codec;
for (var key in codecDef)
codecOptions[key] = codecDef[key];
if (!codecOptions.encodingName)
codecOptions.encodingName = enc;
}
else {
for (var key in codec)
codecOptions[key] = codec[key];
}
enc = codec.type;
enc = codecDef.type;
break;
case "function": // Codec itself.
if (!codecOptions)
codecOptions = { encodingName: enc };
codecOptions.iconv = iconv;
if (!codecOptions.encodingName)
codecOptions.encodingName = enc;
// The codec function must load all tables and return object with .encoder and .decoder methods.
// It'll be called only once (for each different options object).
codecData = codec.call(iconv.encodings, codecOptions);
codec = new codecDef(codecOptions, iconv);
iconv._codecDataCache[codecOptions.encodingName] = codecData; // Save it to be reused later.
return codecData;
iconv._codecDataCache[codecOptions.encodingName] = codec; // Save it to be reused later.
return codec;

@@ -113,2 +111,23 @@ default:

iconv.getEncoder = function getEncoder(encoding, options) {
var codec = iconv.getCodec(encoding),
encoder = new codec.encoder(options, codec);
if (codec.bomAware && options && options.addBOM)
encoder = new bomHandling.PrependBOM(encoder, options);
return encoder;
}
iconv.getDecoder = function getDecoder(encoding, options) {
var codec = iconv.getCodec(encoding),
decoder = new codec.decoder(options, codec);
if (codec.bomAware && !(options && options.stripBOM === false))
decoder = new bomHandling.StripBOM(decoder, options);
return decoder;
}
// Load extensions in Node. All of them are omitted in Browserify build via 'browser' field in package.json.

@@ -115,0 +134,0 @@ var nodeVer = typeof process !== 'undefined' && process.versions && process.versions.node;

@@ -0,1 +1,3 @@

"use strict"
var Transform = require("stream").Transform;

@@ -9,7 +11,7 @@

iconv.encodeStream = function encodeStream(encoding, options) {
return new IconvLiteEncoderStream(iconv.getCodec(encoding).encoder(options), options);
return new IconvLiteEncoderStream(iconv.getEncoder(encoding, options), options);
}
iconv.decodeStream = function decodeStream(encoding, options) {
return new IconvLiteDecoderStream(iconv.getCodec(encoding).decoder(options), options);
return new IconvLiteDecoderStream(iconv.getDecoder(encoding, options), options);
}

@@ -16,0 +18,0 @@

{
"name": "iconv-lite",
"description": "Convert character encodings in pure javascript.",
"version": "0.4.8",
"version": "0.4.9",
"license": "MIT",

@@ -50,4 +50,4 @@

"istanbul": "*",
"iconv": "2.1.4"
"iconv": "2.1"
}
}

@@ -1,5 +0,3 @@

## Pure JS character encoding conversion
## Pure JS character encoding conversion [![Build Status](https://travis-ci.org/ashtuchkin/iconv-lite.svg?branch=master)](https://travis-ci.org/ashtuchkin/iconv-lite)
<!-- [![Build Status](https://secure.travis-ci.org/ashtuchkin/iconv-lite.png?branch=master)](http://travis-ci.org/ashtuchkin/iconv-lite) -->
* Doesn't need native code compilation. Works on Windows and in sandboxed environments like [Cloud9](http://c9.io).

@@ -118,8 +116,21 @@ * Used in popular projects like [Express.js (body_parser)](https://github.com/expressjs/body-parser),

## BOM handling
## Notes
* Decoding: BOM is stripped by default, unless overridden by passing `stripBOM: false` in options
(f.ex. `iconv.decode(buf, enc, {stripBOM: false})`).
A callback might also be given as a `stripBOM` parameter - it'll be called if BOM character was actually found.
* Encoding: No BOM added, unless overridden by `addBOM: true` option.
## UTF-16 Encodings
This library supports UTF-16LE, UTF-16BE and UTF-16 encodings. First two are straightforward, but UTF-16 is trying to be
smart about endianness in the following ways:
* Decoding: uses BOM and 'spaces heuristic' to determine input endianness. Default is UTF-16LE, but can be
overridden with `defaultEncoding: 'utf-16be'` option. Strips BOM unless `stripBOM: false`.
* Encoding: uses UTF-16LE and writes BOM by default. Use `addBOM: false` to override.
## Other notes
When decoding, be sure to supply a Buffer to decode() method, otherwise [bad things usually happen](https://github.com/ashtuchkin/iconv-lite/wiki/Use-Buffers-when-decoding).
Untranslatable characters are set to � or ?. No transliteration is currently supported.
Uses BOM to determine endianness, but doesn't remove it. Use ['strip-bom' module](https://github.com/sindresorhus/strip-bom).
Node versions 0.10.31 and 0.11.13 are buggy, don't use them (see #65, #77).

@@ -126,0 +137,0 @@

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc