iconv-lite - npm Package Compare versions

lib/bom-handling.js

12

Changelog.md


		# 0.4.9 / 2015-05-24

		* Streamlined BOM handling: strip BOM by default, add BOM when encoding if
		addBOM: true. Added docs to Readme.
		* UTF16 now uses UTF16-LE by default.
		* Fixed minor issue with big5 encoding.
		* Added io.js testing on Travis; updated node-iconv version to test against.
		Now we just skip testing SBCS encodings that node-iconv doesn't support.
		* (internal refactoring) Updated codec interface to use classes.
		* Use strict mode in all files.


		# 0.4.8 / 2015-04-14
		@@ -3,0 +15,0 @@

137

encodings/dbcs-codec.js

		@@ -0,1 +1,2 @@
		"use strict"

		@@ -6,5 +7,3 @@ // Multibyte codec. In this scheme, a character is represented by 1 or more bytes.

		exports._dbcs = function(options) {
		return new DBCSCodec(options);
		}
		exports._dbcs = DBCSCodec;

		@@ -23,11 +22,11 @@ var UNASSIGNED = -1,
		// Class DBCSCodec reads and initializes mapping tables.
		function DBCSCodec(options) {
		this.options = options;
		if (!options)
		function DBCSCodec(codecOptions, iconv) {
		this.encodingName = codecOptions.encodingName;
		if (!codecOptions)
		throw new Error("DBCS codec is called without the data.")
		if (!options.table)
		throw new Error("Encoding '" + options.encodingName + "' has no data.");
		if (!codecOptions.table)
		throw new Error("Encoding '" + this.encodingName + "' has no data.");

		// Load tables.
		var mappingTable = options.table();
		var mappingTable = codecOptions.table();

		@@ -54,3 +53,3 @@

		this.defaultCharUnicode = options.iconv.defaultCharUnicode;
		this.defaultCharUnicode = iconv.defaultCharUnicode;

		@@ -75,7 +74,10 @@
		var skipEncodeChars = {};
		if (options.encodeSkipVals)
		for (var i = 0; i < options.encodeSkipVals.length; i++) {
		var range = options.encodeSkipVals[i];
		for (var j = range.from; j <= range.to; j++)
		skipEncodeChars[j] = true;
		if (codecOptions.encodeSkipVals)
		for (var i = 0; i < codecOptions.encodeSkipVals.length; i++) {
		var val = codecOptions.encodeSkipVals[i];
		if (typeof val === 'number')
		skipEncodeChars[val] = true;
		else
		for (var j = val.from; j <= val.to; j++)
		skipEncodeChars[j] = true;
		}
		@@ -87,9 +89,9 @@
		// Add more encoding pairs when needed.
		if (options.encodeAdd) {
		for (var uChar in options.encodeAdd)
		if (Object.prototype.hasOwnProperty.call(options.encodeAdd, uChar))
		this._setEncodeChar(uChar.charCodeAt(0), options.encodeAdd[uChar]);
		if (codecOptions.encodeAdd) {
		for (var uChar in codecOptions.encodeAdd)
		if (Object.prototype.hasOwnProperty.call(codecOptions.encodeAdd, uChar))
		this._setEncodeChar(uChar.charCodeAt(0), codecOptions.encodeAdd[uChar]);
		}

		this.defCharSB = this.encodeTable[0][options.iconv.defaultCharSingleByte.charCodeAt(0)];
		this.defCharSB = this.encodeTable[0][iconv.defaultCharSingleByte.charCodeAt(0)];
		if (this.defCharSB === UNASSIGNED) this.defCharSB = this.encodeTable[0]['?'];
		@@ -100,4 +102,4 @@ if (this.defCharSB === UNASSIGNED) this.defCharSB = "?".charCodeAt(0);
		// Load & create GB18030 tables when needed.
		if (typeof options.gb18030 === 'function') {
		this.gb18030 = options.gb18030(); // Load GB18030 ranges.
		if (typeof codecOptions.gb18030 === 'function') {
		this.gb18030 = codecOptions.gb18030(); // Load GB18030 ranges.

		@@ -124,45 +126,5 @@ // Add GB18030 decode tables.

		// Public interface: create encoder and decoder objects.
		// The methods (write, end) are simple functions to not inhibit optimizations.
		DBCSCodec.prototype.encoder = function encoderDBCS(options) {
		return {
		// Methods
		write: encoderDBCSWrite,
		end: encoderDBCSEnd,
		DBCSCodec.prototype.encoder = DBCSEncoder;
		DBCSCodec.prototype.decoder = DBCSDecoder;

		// Encoder state
		leadSurrogate: -1,
		seqObj: undefined,

		// Static data
		encodeTable: this.encodeTable,
		encodeTableSeq: this.encodeTableSeq,
		defaultCharSingleByte: this.defCharSB,
		gb18030: this.gb18030,

		// Export for testing
		findIdx: findIdx,
		}
		}

		DBCSCodec.prototype.decoder = function decoderDBCS(options) {
		return {
		// Methods
		write: decoderDBCSWrite,
		end: decoderDBCSEnd,

		// Decoder state
		nodeIdx: 0,
		prevBuf: new Buffer(0),

		// Static data
		decodeTables: this.decodeTables,
		decodeTableSeq: this.decodeTableSeq,
		defaultCharUnicode: this.defaultCharUnicode,
		gb18030: this.gb18030,
		}
		}



		// Decoder helpers
		@@ -188,3 +150,3 @@ DBCSCodec.prototype._getDecodeTrieNode = function(addr) {
		else
		throw new Error("Overwrite byte in " + this.options.encodingName + ", addr: " + addr.toString(16));
		throw new Error("Overwrite byte in " + this.encodingName + ", addr: " + addr.toString(16));
		}
		@@ -214,3 +176,3 @@ return node;
		else
		throw new Error("Incorrect surrogate pair in " + this.options.encodingName + " at chunk " + chunk[0]);
		throw new Error("Incorrect surrogate pair in " + this.encodingName + " at chunk " + chunk[0]);
		}
		@@ -236,6 +198,6 @@ else if (0x0FF0 < code && code <= 0x0FFF) { // Character sequence (our own encoding used)
		else
		throw new Error("Incorrect type '" + typeof part + "' given in " + this.options.encodingName + " at chunk " + chunk[0]);
		throw new Error("Incorrect type '" + typeof part + "' given in " + this.encodingName + " at chunk " + chunk[0]);
		}
		if (curAddr > 0xFF)
		throw new Error("Incorrect chunk in " + this.options.encodingName + " at addr " + chunk[0] + ": too long" + curAddr);
		throw new Error("Incorrect chunk in " + this.encodingName + " at addr " + chunk[0] + ": too long" + curAddr);
		}
		@@ -316,6 +278,17 @@

		// == Actual Encoding ==========================================================
		// == Encoder ==================================================================

		function DBCSEncoder(options, codec) {
		// Encoder state
		this.leadSurrogate = -1;
		this.seqObj = undefined;

		// Static data
		this.encodeTable = codec.encodeTable;
		this.encodeTableSeq = codec.encodeTableSeq;
		this.defaultCharSingleByte = codec.defCharSB;
		this.gb18030 = codec.gb18030;
		}

		function encoderDBCSWrite(str) {
		DBCSEncoder.prototype.write = function(str) {
		var newBuf = new Buffer(str.length * (this.gb18030 ? 4 : 3)),
		@@ -440,3 +413,3 @@ leadSurrogate = this.leadSurrogate,

		function encoderDBCSEnd() {
		DBCSEncoder.prototype.end = function() {
		if (this.leadSurrogate === -1 && this.seqObj === undefined)
		@@ -472,7 +445,21 @@ return; // All clean. Most often case.

		// Export for testing
		DBCSEncoder.prototype.findIdx = findIdx;

		// == Actual Decoding ==========================================================

		// == Decoder ==================================================================

		function decoderDBCSWrite(buf) {
		function DBCSDecoder(options, codec) {
		// Decoder state
		this.nodeIdx = 0;
		this.prevBuf = new Buffer(0);

		// Static data
		this.decodeTables = codec.decodeTables;
		this.decodeTableSeq = codec.decodeTableSeq;
		this.defaultCharUnicode = codec.defaultCharUnicode;
		this.gb18030 = codec.gb18030;
		}

		DBCSDecoder.prototype.write = function(buf) {
		var newBuf = new Buffer(buf.length*2),
		@@ -545,3 +532,3 @@ nodeIdx = this.nodeIdx,

		function decoderDBCSEnd() {
		DBCSDecoder.prototype.end = function() {
		var ret = '';
		@@ -559,3 +546,3 @@
		if (buf.length > 0)
		ret += decoderDBCSWrite.call(this, buf);
		ret += this.write(buf);
		}
		@@ -562,0 +549,0 @@

2

encodings/dbcs-data.js

		@@ -0,1 +1,2 @@
		"use strict"

		@@ -162,2 +163,3 @@ // Description of supported double byte encodings and aliases.
		table: function() { return require('./tables/cp950.json').concat(require('./tables/big5-added.json')) },
		encodeSkipVals: [0xa2cc],
		},
		@@ -164,0 +166,0 @@

1

encodings/index.js

		@@ -0,1 +1,2 @@
		"use strict"

		@@ -2,0 +3,0 @@ // Update this array if you add/rename/remove files in this directory.

84

encodings/internal.js

		@@ -0,32 +1,37 @@
		"use strict"

		// Export Node.js internal encodings.

		var utf16lebom = new Buffer([0xFF, 0xFE]);

		module.exports = {
		// Encodings
		utf8: { type: "_internal", enc: "utf8" },
		cesu8: { type: "_internal", enc: "utf8" },
		unicode11utf8: { type: "_internal", enc: "utf8" },
		ucs2: { type: "_internal", enc: "ucs2", bom: utf16lebom },
		utf16le:{ type: "_internal", enc: "ucs2", bom: utf16lebom },
		binary: { type: "_internal", enc: "binary" },
		base64: { type: "_internal", enc: "base64" },
		hex: { type: "_internal", enc: "hex" },
		utf8: { type: "_internal", bomAware: true},
		cesu8: "utf8",
		unicode11utf8: "utf8",

		// Codec.
		_internal: function(options) {
		if (!options \|\| !options.enc)
		throw new Error("Internal codec is called without encoding type.")
		ucs2: { type: "_internal", bomAware: true},
		utf16le: "ucs2",

		return {
		encoder: options.enc == "base64" ? encoderBase64 : encoderInternal,
		decoder: decoderInternal,
		binary: { type: "_internal" },
		base64: { type: "_internal" },
		hex: { type: "_internal" },

		enc: options.enc,
		bom: options.bom,
		};
		},
		// Codec.
		_internal: InternalCodec,
		};

		//------------------------------------------------------------------------------

		function InternalCodec(codecOptions) {
		this.enc = codecOptions.encodingName;
		this.bomAware = codecOptions.bomAware;

		if (this.enc === "base64")
		this.encoder = InternalEncoderBase64;
		}

		InternalCodec.prototype.encoder = InternalEncoder;
		InternalCodec.prototype.decoder = InternalDecoder;

		//------------------------------------------------------------------------------

		// We use node.js internal decoder. It's signature is the same as ours.
		@@ -38,34 +43,33 @@ var StringDecoder = require('string_decoder').StringDecoder;

		function decoderInternal() {
		return new StringDecoder(this.enc);

		function InternalDecoder(options, codec) {
		StringDecoder.call(this, codec.enc);
		}

		InternalDecoder.prototype = StringDecoder.prototype;


		//------------------------------------------------------------------------------
		// Encoder is mostly trivial

		function encoderInternal() {
		return {
		write: encodeInternal,
		end: function() {},

		enc: this.enc,
		}
		function InternalEncoder(options, codec) {
		this.enc = codec.enc;
		}

		function encodeInternal(str) {
		InternalEncoder.prototype.write = function(str) {
		return new Buffer(str, this.enc);
		}

		InternalEncoder.prototype.end = function() {
		}


		//------------------------------------------------------------------------------
		// Except base64 encoder, which must keep its state.

		function encoderBase64() {
		return {
		write: encodeBase64Write,
		end: encodeBase64End,

		prevStr: '',
		};
		function InternalEncoderBase64(options, codec) {
		this.prevStr = '';
		}

		function encodeBase64Write(str) {
		InternalEncoderBase64.prototype.write = function(str) {
		str = this.prevStr + str;
		@@ -79,5 +83,5 @@ var completeQuads = str.length - (str.length % 4);

		function encodeBase64End() {
		InternalEncoderBase64.prototype.end = function() {
		return new Buffer(this.prevStr, "base64");
		}

62

encodings/sbcs-codec.js

		@@ -0,1 +1,2 @@
		"use strict"

		@@ -5,45 +6,39 @@ // Single-byte codec. Needs a 'chars' string parameter that contains 256 or 128 chars that

		exports._sbcs = function(options) {
		if (!options)
		exports._sbcs = SBCSCodec;
		function SBCSCodec(codecOptions, iconv) {
		if (!codecOptions)
		throw new Error("SBCS codec is called without the data.")

		// Prepare char buffer for decoding.
		if (!options.chars \|\| (options.chars.length !== 128 && options.chars.length !== 256))
		throw new Error("Encoding '"+options.type+"' has incorrect 'chars' (must be of len 128 or 256)");
		if (!codecOptions.chars \|\| (codecOptions.chars.length !== 128 && codecOptions.chars.length !== 256))
		throw new Error("Encoding '"+codecOptions.type+"' has incorrect 'chars' (must be of len 128 or 256)");

		if (options.chars.length === 128) {
		if (codecOptions.chars.length === 128) {
		var asciiString = "";
		for (var i = 0; i < 128; i++)
		asciiString += String.fromCharCode(i);
		options.chars = asciiString + options.chars;
		codecOptions.chars = asciiString + codecOptions.chars;
		}

		var decodeBuf = new Buffer(options.chars, 'ucs2');
		this.decodeBuf = new Buffer(codecOptions.chars, 'ucs2');

		// Encoding buffer.
		var encodeBuf = new Buffer(65536);
		encodeBuf.fill(options.iconv.defaultCharSingleByte.charCodeAt(0));
		encodeBuf.fill(iconv.defaultCharSingleByte.charCodeAt(0));

		for (var i = 0; i < options.chars.length; i++)
		encodeBuf[options.chars.charCodeAt(i)] = i;
		for (var i = 0; i < codecOptions.chars.length; i++)
		encodeBuf[codecOptions.chars.charCodeAt(i)] = i;

		return {
		encoder: encoderSBCS,
		decoder: decoderSBCS,

		encodeBuf: encodeBuf,
		decodeBuf: decodeBuf,
		};
		this.encodeBuf = encodeBuf;
		}

		function encoderSBCS(options) {
		return {
		write: encoderSBCSWrite,
		end: function() {},
		SBCSCodec.prototype.encoder = SBCSEncoder;
		SBCSCodec.prototype.decoder = SBCSDecoder;

		encodeBuf: this.encodeBuf,
		};

		function SBCSEncoder(options, codec) {
		this.encodeBuf = codec.encodeBuf;
		}

		function encoderSBCSWrite(str) {
		SBCSEncoder.prototype.write = function(str) {
		var buf = new Buffer(str.length);
		@@ -56,13 +51,11 @@ for (var i = 0; i < str.length; i++)

		SBCSEncoder.prototype.end = function() {
		}

		function decoderSBCS(options) {
		return {
		write: decoderSBCSWrite,
		end: function() {},

		decodeBuf: this.decodeBuf,
		};

		function SBCSDecoder(options, codec) {
		this.decodeBuf = codec.decodeBuf;
		}

		function decoderSBCSWrite(buf) {
		SBCSDecoder.prototype.write = function(buf) {
		// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
		@@ -72,3 +65,3 @@ var decodeBuf = this.decodeBuf;
		var idx1 = 0, idx2 = 0;
		for (var i = 0, _len = buf.length; i < _len; i++) {
		for (var i = 0; i < buf.length; i++) {
		idx1 = buf[i]2; idx2 = i2;
		@@ -80,1 +73,4 @@ newBuf[idx2] = decodeBuf[idx1];
		}

		SBCSDecoder.prototype.end = function() {
		}

1

encodings/sbcs-data-generated.js

		@@ -0,1 +1,2 @@
		"use strict"

		@@ -2,0 +3,0 @@ // Generated data for sbcs codec. Don't edit manually. Regenerate using generation/gen-sbcs.js script.

1

encodings/sbcs-data.js

		@@ -0,1 +1,2 @@
		"use strict"

		@@ -2,0 +3,0 @@ // Manually added data to be used by sbcs codec in addition to generated one.

210

encodings/utf16.js

		@@ -0,13 +1,12 @@
		"use strict"


		// == UTF16-BE codec. ==========================================================

		exports.utf16be = function(options) {
		return {
		encoder: utf16beEncoder,
		decoder: utf16beDecoder,
		exports.utf16be = Utf16BECodec;
		function Utf16BECodec() {
		}

		bom: new Buffer([0xFE, 0xFF]),
		};
		};
		Utf16BECodec.prototype.encoder = Utf16BEEncoder;
		Utf16BECodec.prototype.decoder = Utf16BEDecoder;
		Utf16BECodec.prototype.bomAware = true;

		@@ -17,10 +16,6 @@

		function utf16beEncoder(options) {
		return {
		write: utf16beEncoderWrite,
		end: function() {},
		}
		function Utf16BEEncoder() {
		}

		function utf16beEncoderWrite(str) {
		Utf16BEEncoder.prototype.write = function(str) {
		var buf = new Buffer(str, 'ucs2');
		@@ -33,15 +28,13 @@ for (var i = 0; i < buf.length; i += 2) {

		Utf16BEEncoder.prototype.end = function() {
		}


		// -- Decoding

		function utf16beDecoder(options) {
		return {
		write: utf16beDecoderWrite,
		end: function() {},

		overflowByte: -1,
		};
		function Utf16BEDecoder() {
		this.overflowByte = -1;
		}

		function utf16beDecoderWrite(buf) {
		Utf16BEDecoder.prototype.write = function(buf) {
		if (buf.length == 0)
		@@ -69,51 +62,38 @@ return '';

		Utf16BEDecoder.prototype.end = function() {
		}


		// == UTF-16 codec =============================================================
		// Decoder chooses automatically from UTF-16LE and UTF-16BE using BOM and space-based heuristic.
		// Defaults to UTF-16BE, according to RFC 2781, although it is against some industry practices, see
		// Defaults to UTF-16LE, as it's prevalent and default in Node.
		// http://en.wikipedia.org/wiki/UTF-16 and http://encoding.spec.whatwg.org/#utf-16le
		// Decoder default can be changed: iconv.decode(buf, 'utf16', {default: 'utf-16le'});
		// Decoder default can be changed: iconv.decode(buf, 'utf16', {defaultEncoding: 'utf-16be'});

		// Encoder prepends BOM and uses UTF-16BE.
		// Endianness can also be changed: iconv.encode(str, 'utf16', {use: 'utf-16le'});
		// Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).

		exports.utf16 = function(options) {
		return {
		encoder: utf16Encoder,
		decoder: utf16Decoder,
		exports.utf16 = Utf16Codec;
		function Utf16Codec(codecOptions, iconv) {
		this.iconv = iconv;
		}

		getCodec: options.iconv.getCodec,
		};
		};
		Utf16Codec.prototype.encoder = Utf16Encoder;
		Utf16Codec.prototype.decoder = Utf16Decoder;

		// -- Encoding

		function utf16Encoder(options) {
		// -- Encoding (pass-through)

		function Utf16Encoder(options, codec) {
		options = options \|\| {};
		var codec = this.getCodec(options.use \|\| 'utf-16be');
		if (!codec.bom)
		throw new Error("iconv-lite: in UTF-16 encoder, 'use' parameter should be either UTF-16BE or UTF16-LE.");

		return {
		write: utf16EncoderWrite,
		end: utf16EncoderEnd,

		bom: codec.bom,
		internalEncoder: codec.encoder(options),
		};
		if (options.addBOM === undefined)
		options.addBOM = true;
		this.encoder = codec.iconv.getEncoder('utf-16le', options);
		}

		function utf16EncoderWrite(str) {
		var buf = this.internalEncoder.write(str);

		if (this.bom) {
		buf = Buffer.concat([this.bom, buf]);
		this.bom = null;
		}

		return buf;
		Utf16Encoder.prototype.write = function(str) {
		return this.encoder.write(str);
		}

		function utf16EncoderEnd() {
		return this.internalEncoder.end();
		Utf16Encoder.prototype.end = function() {
		return this.encoder.end();
		}
		@@ -124,83 +104,75 @@

		function utf16Decoder(options) {
		return {
		write: utf16DecoderWrite,
		end: utf16DecoderEnd,
		function Utf16Decoder(options, codec) {
		this.decoder = null;
		this.initialBytes = [];
		this.initialBytesLen = 0;

		internalDecoder: null,
		initialBytes: [],
		initialBytesLen: 0,

		options: options \|\| {},
		getCodec: this.getCodec,
		};
		this.options = options \|\| {};
		this.iconv = codec.iconv;
		}

		function utf16DecoderWrite(buf) {
		if (this.internalDecoder)
		return this.internalDecoder.write(buf);
		Utf16Decoder.prototype.write = function(buf) {
		if (!this.decoder) {
		// Codec is not chosen yet. Accumulate initial bytes.
		this.initialBytes.push(buf);
		this.initialBytesLen += buf.length;

		if (this.initialBytesLen < 16) // We need more bytes to use space heuristic (see below)
		return '';

		// Codec is not chosen yet. Accumulate initial bytes.
		this.initialBytes.push(buf);
		this.initialBytesLen += buf.length;

		if (this.initialBytesLen < 16) // We need > 2 bytes to use space heuristic (see below)
		return '';
		// We have enough bytes -> detect endianness.
		var buf = Buffer.concat(this.initialBytes),
		encoding = detectEncoding(buf, this.options.defaultEncoding);
		this.decoder = this.iconv.getDecoder(encoding, this.options);
		this.initialBytes.length = this.initialBytesLen = 0;
		}

		// We have enough bytes -> decide endianness.
		return utf16DecoderDecideEndianness.call(this);
		return this.decoder.write(buf);
		}

		function utf16DecoderEnd() {
		if (this.internalDecoder)
		return this.internalDecoder.end();
		Utf16Decoder.prototype.end = function() {
		if (!this.decoder) {
		var buf = Buffer.concat(this.initialBytes),
		encoding = detectEncoding(buf, this.options.defaultEncoding);
		this.decoder = this.iconv.getDecoder(encoding, this.options);

		var res = utf16DecoderDecideEndianness.call(this);
		var trail;
		var res = this.decoder.write(buf),
		trail = this.decoder.end();

		if (this.internalDecoder)
		trail = this.internalDecoder.end();

		return (trail && trail.length > 0) ? (res + trail) : res;
		return trail ? (res + trail) : res;
		}
		return this.decoder.end();
		}

		function utf16DecoderDecideEndianness() {
		var buf = Buffer.concat(this.initialBytes);
		this.initialBytes.length = this.initialBytesLen = 0;
		function detectEncoding(buf, defaultEncoding) {
		var enc = defaultEncoding \|\| 'utf-16le';

		if (buf.length < 2)
		return ''; // Not a valid UTF-16 sequence anyway.
		if (buf.length >= 2) {
		// Check BOM.
		if (buf[0] == 0xFE && buf[1] == 0xFF) // UTF-16BE BOM
		enc = 'utf-16be';
		else if (buf[0] == 0xFF && buf[1] == 0xFE) // UTF-16LE BOM
		enc = 'utf-16le';
		else {
		// No BOM found. Try to deduce encoding from initial content.
		// Most of the time, the content has spaces (U+0020), but the opposite (U+2000) is very uncommon.
		// So, we count spaces as if it was LE or BE, and decide from that.
		var spacesLE = 0, spacesBE = 0, // Counts of space chars in both positions
		_len = Math.min(buf.length - (buf.length % 2), 64); // Len is always even.

		// Default encoding.
		var enc = this.options.default \|\| 'utf-16be';
		for (var i = 0; i < _len; i += 2) {
		if (buf[i] == 0x00 && buf[i+1] == 0x20) spacesBE++;
		if (buf[i] == 0x20 && buf[i+1] == 0x00) spacesLE++;
		}

		// Check BOM.
		if (buf[0] == 0xFE && buf[1] == 0xFF) { // UTF-16BE BOM
		enc = 'utf-16be'; buf = buf.slice(2);
		}
		else if (buf[0] == 0xFF && buf[1] == 0xFE) { // UTF-16LE BOM
		enc = 'utf-16le'; buf = buf.slice(2);
		}
		else {
		// No BOM found. Try to deduce encoding from initial content.
		// Most of the time, the content has spaces (U+0020), but the opposite (U+2000) is very uncommon.
		// So, we count spaces as if it was LE or BE, and decide from that.
		var spaces = [0, 0], // Counts of space chars in both positions
		_len = Math.min(buf.length - (buf.length % 2), 64); // Len is always even.

		for (var i = 0; i < _len; i += 2) {
		if (buf[i] == 0x00 && buf[i+1] == 0x20) spaces[0]++;
		if (buf[i] == 0x20 && buf[i+1] == 0x00) spaces[1]++;
		if (spacesBE > 0 && spacesLE == 0)
		enc = 'utf-16be';
		else if (spacesBE == 0 && spacesLE > 0)
		enc = 'utf-16le';
		}

		if (spaces[0] > 0 && spaces[1] == 0)
		enc = 'utf-16be';
		else if (spaces[0] == 0 && spaces[1] > 0)
		enc = 'utf-16le';
		}

		this.internalDecoder = this.getCodec(enc).decoder(this.options);
		return this.internalDecoder.write(buf);
		return enc;
		}

109

encodings/utf7.js

		@@ -0,34 +1,26 @@
		"use strict"

		// UTF-7 codec, according to https://tools.ietf.org/html/rfc2152
		// Below is UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3
		// See also below a UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3

		exports.utf7 = function(options) {
		return {
		encoder: function utf7Encoder() {
		return {
		write: utf7EncoderWrite,
		end: function() {},
		exports.utf7 = Utf7Codec;
		exports.unicode11utf7 = 'utf7'; // Alias UNICODE-1-1-UTF-7
		function Utf7Codec(codecOptions, iconv) {
		this.iconv = iconv;
		};

		iconv: options.iconv,
		};
		},
		decoder: function utf7Decoder() {
		return {
		write: utf7DecoderWrite,
		end: utf7DecoderEnd,
		Utf7Codec.prototype.encoder = Utf7Encoder;
		Utf7Codec.prototype.decoder = Utf7Decoder;
		Utf7Codec.prototype.bomAware = true;

		iconv: options.iconv,
		inBase64: false,
		base64Accum: '',
		};
		},
		};
		};

		exports.unicode11utf7 = 'utf7'; // Alias UNICODE-1-1-UTF-7
		// -- Encoding


		var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g;

		function utf7EncoderWrite(str) {
		function Utf7Encoder(options, codec) {
		this.iconv = codec.iconv;
		}

		Utf7Encoder.prototype.write = function(str) {
		// Naive implementation.
		@@ -43,3 +35,14 @@ // Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".

		Utf7Encoder.prototype.end = function() {
		}


		// -- Decoding

		function Utf7Decoder(options, codec) {
		this.iconv = codec.iconv;
		this.inBase64 = false;
		this.base64Accum = '';
		}

		var base64Regex = /[A-Za-z0-9\/+]/;
		@@ -54,3 +57,3 @@ var base64Chars = [];

		function utf7DecoderWrite(buf) {
		Utf7Decoder.prototype.write = function(buf) {
		var res = "", lastI = 0,
		@@ -107,3 +110,3 @@ inBase64 = this.inBase64,

		function utf7DecoderEnd() {
		Utf7Decoder.prototype.end = function() {
		var res = "";
		@@ -131,30 +134,22 @@ if (this.inBase64 && this.base64Accum.length > 0)

		exports.utf7imap = function(options) {
		return {
		encoder: function utf7ImapEncoder() {
		return {
		write: utf7ImapEncoderWrite,
		end: utf7ImapEncoderEnd,
		exports.utf7imap = Utf7IMAPCodec;
		function Utf7IMAPCodec(codecOptions, iconv) {
		this.iconv = iconv;
		};

		iconv: options.iconv,
		inBase64: false,
		base64Accum: new Buffer(6),
		base64AccumIdx: 0,
		};
		},
		decoder: function utf7ImapDecoder() {
		return {
		write: utf7ImapDecoderWrite,
		end: utf7ImapDecoderEnd,
		Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder;
		Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder;
		Utf7IMAPCodec.prototype.bomAware = true;

		iconv: options.iconv,
		inBase64: false,
		base64Accum: '',
		};
		},
		};
		};

		// -- Encoding

		function utf7ImapEncoderWrite(str) {
		function Utf7IMAPEncoder(options, codec) {
		this.iconv = codec.iconv;
		this.inBase64 = false;
		this.base64Accum = new Buffer(6);
		this.base64AccumIdx = 0;
		}

		Utf7IMAPEncoder.prototype.write = function(str) {
		var inBase64 = this.inBase64,
		@@ -208,3 +203,3 @@ base64Accum = this.base64Accum,

		function utf7ImapEncoderEnd() {
		Utf7IMAPEncoder.prototype.end = function() {
		var buf = new Buffer(10), bufIdx = 0;
		@@ -225,6 +220,14 @@ if (this.inBase64) {

		// -- Decoding

		function Utf7IMAPDecoder(options, codec) {
		this.iconv = codec.iconv;
		this.inBase64 = false;
		this.base64Accum = '';
		}

		var base64IMAPChars = base64Chars.slice();
		base64IMAPChars[','.charCodeAt(0)] = true;

		function utf7ImapDecoderWrite(buf) {
		Utf7IMAPDecoder.prototype.write = function(buf) {
		var res = "", lastI = 0,
		@@ -282,3 +285,3 @@ inBase64 = this.inBase64,

		function utf7ImapDecoderEnd() {
		Utf7IMAPDecoder.prototype.end = function() {
		var res = "";
		@@ -285,0 +288,0 @@ if (this.inBase64 && this.base64Accum.length > 0)

18

lib/extend-node.js

		@@ -0,1 +1,2 @@
		"use strict"

		@@ -17,3 +18,3 @@ // == Extend Node primitives to use iconv-lite =================================
		Buffer.isNativeEncoding = function(enc) {
		return nodeNativeEncodings[enc && enc.toLowerCase()];
		return enc && nodeNativeEncodings[enc.toLowerCase()];
		}
		@@ -27,9 +28,3 @@
		encoding = String(encoding \|\| 'utf8').toLowerCase();
		start = +start \|\| 0;
		if (typeof end !== 'number') end = this.length;

		// Fastpath empty strings
		if (+end == start)
		return '';

		// Use native conversion when possible
		@@ -173,8 +168,5 @@ if (Buffer.isNativeEncoding(encoding))
		Readable.prototype.setEncoding = function setEncoding(enc, options) {
		// Try to use original function when possible.
		if (Buffer.isNativeEncoding(enc))
		return original.ReadableSetEncoding.call(this, enc);

		// Try to use our own decoder, it has the same interface.
		this._readableState.decoder = iconv.getCodec(enc).decoder(options);
		// Use our own decoder, it has the same interface.
		// We cannot use original function as it doesn't handle BOM-s.
		this._readableState.decoder = iconv.getDecoder(enc, options);
		this._readableState.encoding = enc;
		@@ -181,0 +173,0 @@ }

71

lib/index.js

		@@ -0,3 +1,5 @@
		"use strict"

		var iconv = module.exports;
		var bomHandling = require('./bom-handling'),
		iconv = module.exports;

		@@ -16,3 +18,3 @@ // All codecs and aliases are kept here, keyed by encoding name/alias.

		var encoder = iconv.getCodec(encoding).encoder(options);
		var encoder = iconv.getEncoder(encoding, options);

		@@ -35,3 +37,3 @@ var res = encoder.write(str);

		var decoder = iconv.getCodec(encoding).decoder(options);
		var decoder = iconv.getDecoder(encoding, options);

		@@ -41,3 +43,3 @@ var res = decoder.write(buf);

		return (trail && trail.length > 0) ? (res + trail) : res;
		return trail ? (res + trail) : res;
		}
		@@ -68,39 +70,35 @@
		// Traverse iconv.encodings to find actual codec.
		var codecData, codecOptions;
		var codecOptions = {};
		while (true) {
		codecData = iconv._codecDataCache[enc];
		if (codecData)
		return codecData;
		var codec = iconv._codecDataCache[enc];
		if (codec)
		return codec;

		var codec = iconv.encodings[enc];
		var codecDef = iconv.encodings[enc];

		switch (typeof codec) {
		switch (typeof codecDef) {
		case "string": // Direct alias to other encoding.
		enc = codec;
		enc = codecDef;
		break;

		case "object": // Alias with options. Can be layered.
		if (!codecOptions) {
		codecOptions = codec;
		for (var key in codecDef)
		codecOptions[key] = codecDef[key];

		if (!codecOptions.encodingName)
		codecOptions.encodingName = enc;
		}
		else {
		for (var key in codec)
		codecOptions[key] = codec[key];
		}

		enc = codec.type;

		enc = codecDef.type;
		break;

		case "function": // Codec itself.
		if (!codecOptions)
		codecOptions = { encodingName: enc };
		codecOptions.iconv = iconv;
		if (!codecOptions.encodingName)
		codecOptions.encodingName = enc;

		// The codec function must load all tables and return object with .encoder and .decoder methods.
		// It'll be called only once (for each different options object).
		codecData = codec.call(iconv.encodings, codecOptions);
		codec = new codecDef(codecOptions, iconv);

		iconv._codecDataCache[codecOptions.encodingName] = codecData; // Save it to be reused later.
		return codecData;
		iconv._codecDataCache[codecOptions.encodingName] = codec; // Save it to be reused later.
		return codec;

		@@ -113,2 +111,23 @@ default:

		iconv.getEncoder = function getEncoder(encoding, options) {
		var codec = iconv.getCodec(encoding),
		encoder = new codec.encoder(options, codec);

		if (codec.bomAware && options && options.addBOM)
		encoder = new bomHandling.PrependBOM(encoder, options);

		return encoder;
		}

		iconv.getDecoder = function getDecoder(encoding, options) {
		var codec = iconv.getCodec(encoding),
		decoder = new codec.decoder(options, codec);

		if (codec.bomAware && !(options && options.stripBOM === false))
		decoder = new bomHandling.StripBOM(decoder, options);

		return decoder;
		}


		// Load extensions in Node. All of them are omitted in Browserify build via 'browser' field in package.json.
		@@ -115,0 +134,0 @@ var nodeVer = typeof process !== 'undefined' && process.versions && process.versions.node;

6

lib/streams.js

		@@ -0,1 +1,3 @@
		"use strict"

		var Transform = require("stream").Transform;
		@@ -9,7 +11,7 @@
		iconv.encodeStream = function encodeStream(encoding, options) {
		return new IconvLiteEncoderStream(iconv.getCodec(encoding).encoder(options), options);
		return new IconvLiteEncoderStream(iconv.getEncoder(encoding, options), options);
		}

		iconv.decodeStream = function decodeStream(encoding, options) {
		return new IconvLiteDecoderStream(iconv.getCodec(encoding).decoder(options), options);
		return new IconvLiteDecoderStream(iconv.getDecoder(encoding, options), options);
		}
		@@ -16,0 +18,0 @@

4

package.json

		{
		"name": "iconv-lite",
		"description": "Convert character encodings in pure javascript.",
		"version": "0.4.8",
		"version": "0.4.9",
		"license": "MIT",
		@@ -50,4 +50,4 @@
		"istanbul": "*",
		"iconv": "2.1.4"
		"iconv": "2.1"
		}
		}

21

README.md

		@@ -1,5 +0,3 @@
		## Pure JS character encoding conversion
		## Pure JS character encoding conversion [![Build Status](https://travis-ci.org/ashtuchkin/iconv-lite.svg?branch=master)](https://travis-ci.org/ashtuchkin/iconv-lite)

		<!-- [![Build Status](https://secure.travis-ci.org/ashtuchkin/iconv-lite.png?branch=master)](http://travis-ci.org/ashtuchkin/iconv-lite) -->

		* Doesn't need native code compilation. Works on Windows and in sandboxed environments like [Cloud9](http://c9.io).
		@@ -118,8 +116,21 @@ * Used in popular projects like [Express.js (body_parser)](https://github.com/expressjs/body-parser),

		## BOM handling

		## Notes
		* Decoding: BOM is stripped by default, unless overridden by passing `stripBOM: false` in options
		(f.ex. `iconv.decode(buf, enc, {stripBOM: false})`).
		A callback might also be given as a `stripBOM` parameter - it'll be called if BOM character was actually found.
		* Encoding: No BOM added, unless overridden by `addBOM: true` option.

		## UTF-16 Encodings

		This library supports UTF-16LE, UTF-16BE and UTF-16 encodings. First two are straightforward, but UTF-16 is trying to be
		smart about endianness in the following ways:
		* Decoding: uses BOM and 'spaces heuristic' to determine input endianness. Default is UTF-16LE, but can be
		overridden with `defaultEncoding: 'utf-16be'` option. Strips BOM unless `stripBOM: false`.
		* Encoding: uses UTF-16LE and writes BOM by default. Use `addBOM: false` to override.

		## Other notes

		When decoding, be sure to supply a Buffer to decode() method, otherwise [bad things usually happen](https://github.com/ashtuchkin/iconv-lite/wiki/Use-Buffers-when-decoding).
		Untranslatable characters are set to � or ?. No transliteration is currently supported.
		Uses BOM to determine endianness, but doesn't remove it. Use ['strip-bom' module](https://github.com/sindresorhus/strip-bom).
		Node versions 0.10.31 and 0.11.13 are buggy, don't use them (see #65, #77).
		@@ -126,0 +137,0 @@

.travis.yml

Sorry, the diff of this file is not supported yet

iconv-lite - npm Package Compare versions

Improved metrics