iconv-lite
Advanced tools
Comparing version 0.2.11 to 0.4.0-pre
371
index.js
@@ -1,231 +0,214 @@ | ||
var RE_SPACEDASH = /[- ]/g; | ||
// Module exports | ||
var IconvLiteEncoderStream = false, | ||
IconvLiteDecoderStream = false; | ||
var iconv = module.exports = { | ||
toEncoding: function(str, encoding) { | ||
return iconv.getCodec(encoding).toEncoding(str); | ||
// All codecs and aliases are kept here, keyed by encoding name. | ||
// They are lazy loaded in `getCodec` by `/encodings/index.js` to make initial module loading fast. | ||
encodings: null, | ||
codecData: {}, | ||
// Characters emitted in case of error. | ||
defaultCharUnicode: '�', | ||
defaultCharSingleByte: '?', | ||
// Public API | ||
encode: function(str, encoding, options) { | ||
str = ensureString(str); | ||
var encoder = iconv.getCodec(encoding).encoder(options); | ||
var res = encoder.write(str) || new Buffer(); | ||
if (encoder.end) { | ||
var resTrail = encoder.end(); | ||
if (resTrail && resTrail.length > 0) | ||
res = Buffer.concat([res, resTrail]); | ||
} | ||
return res; | ||
}, | ||
fromEncoding: function(buf, encoding) { | ||
return iconv.getCodec(encoding).fromEncoding(buf); | ||
decode: function(buf, encoding, options) { | ||
buf = ensureBuffer(buf); | ||
var decoder = iconv.getCodec(encoding).decoder(options); | ||
var res = decoder.write(buf) || ""; | ||
if (decoder.end) { | ||
var resTrail = decoder.end(); | ||
if (resTrail && resTrail.length > 0) | ||
res += resTrail; | ||
} | ||
return res; | ||
}, | ||
encodeStream: function(encoding, options) { | ||
if (!IconvLiteEncoderStream) | ||
throw new Error("Iconv-lite streams supported only since Node v0.10."); | ||
return new IconvLiteEncoderStream(iconv.getCodec(encoding).encoder(options), options); | ||
}, | ||
decodeStream: function(encoding, options) { | ||
if (!IconvLiteDecoderStream) | ||
throw new Error("Iconv-lite streams supported only since Node v0.10."); | ||
return new IconvLiteDecoderStream(iconv.getCodec(encoding).decoder(options), options); | ||
}, | ||
encodingExists: function(enc) { | ||
loadEncodings(); | ||
enc = enc.replace(RE_SPACEDASH, "").toLowerCase(); | ||
return (iconv.encodings[enc] !== undefined); | ||
try { | ||
iconv.getCodec(enc); | ||
return true; | ||
} catch (e) { | ||
return false; | ||
} | ||
}, | ||
defaultCharUnicode: '�', | ||
defaultCharSingleByte: '?', | ||
supportsStreams: function() { | ||
return !!IconvLiteEncoderStream; | ||
}, | ||
encodingsLoaded: false, | ||
// Get correct codec for given encoding. | ||
// Search for a codec. | ||
getCodec: function(encoding) { | ||
loadEncodings(); | ||
var enc = encoding || "utf8"; | ||
var codecOptions = undefined; | ||
if (!iconv.encodings) | ||
iconv.encodings = require("./encodings"); // Lazy load all encoding definitions. | ||
// Canonicalize encoding name: strip all non-alphanumeric chars and appended year. | ||
var enc = (''+encoding).toLowerCase().replace(/[^0-9a-z]|:\d{4}$/g, ""); | ||
var codecOptions, saveEnc; | ||
while (1) { | ||
if (getType(enc) === "String") | ||
enc = enc.replace(RE_SPACEDASH, "").toLowerCase(); | ||
var codecData = iconv.codecData[enc]; | ||
if (codecData) | ||
return codecData; | ||
var codec = iconv.encodings[enc]; | ||
var type = getType(codec); | ||
if (type === "String") { | ||
// Link to other encoding. | ||
codecOptions = {originalEncoding: enc}; | ||
enc = codec; | ||
} | ||
else if (type === "Object" && codec.type != undefined) { | ||
// Options for other encoding. | ||
codecOptions = codec; | ||
enc = codec.type; | ||
} | ||
else if (type === "Function") | ||
// Codec itself. | ||
return codec(codecOptions); | ||
else | ||
throw new Error("Encoding not recognized: '" + encoding + "' (searched as: '"+enc+"')"); | ||
} | ||
}, | ||
// Define basic encodings | ||
encodings: { | ||
internal: function(options) { | ||
return { | ||
toEncoding: toInternalEncoding, | ||
fromEncoding: fromInternalEncoding, | ||
options: options | ||
}; | ||
}, | ||
utf8: "internal", | ||
ucs2: "internal", | ||
binary: "internal", | ||
ascii: "internal", | ||
base64: "internal", | ||
// Codepage single-byte encodings. | ||
singlebyte: function(options) { | ||
// Prepare chars if needed | ||
if (!options.charsBuf) { | ||
if (!options.chars || (options.chars.length !== 128 && options.chars.length !== 256)) | ||
throw new Error("Encoding '"+options.type+"' has incorrect 'chars' (must be of len 128 or 256)"); | ||
if (options.chars.length === 128) | ||
options.chars = asciiString + options.chars; | ||
options.charsBuf = new Buffer(options.chars, 'ucs2'); | ||
} | ||
if (!options.revCharsBuf) { | ||
options.revCharsBuf = new Buffer(65536); | ||
var defChar = iconv.defaultCharSingleByte.charCodeAt(0); | ||
for (var i = 0; i < options.revCharsBuf.length; i++) | ||
options.revCharsBuf[i] = defChar; | ||
for (var i = 0; i < options.chars.length; i++) | ||
options.revCharsBuf[options.chars.charCodeAt(i)] = i; | ||
} | ||
switch (getType(codec)) { | ||
case "String": // Direct alias to other encoding. | ||
enc = codec; | ||
break; | ||
return { | ||
toEncoding: toSingleByteEncoding, | ||
fromEncoding: fromSingleByteEncoding, | ||
options: options, | ||
}; | ||
}, | ||
case "Object": // Alias with additional options. Can be layered. | ||
if (!codecOptions) { | ||
codecOptions = codec; | ||
saveEnc = enc; | ||
} | ||
else | ||
for (var key in codec) | ||
codecOptions[key] = codec[key]; | ||
// Codepage double-byte encodings. | ||
table: function(options) { | ||
if (!options.table) { | ||
throw new Error("Encoding '" + options.type + "' has incorect 'table' option"); | ||
} | ||
if (!options.revCharsTable) { | ||
var revCharsTable = options.revCharsTable = {}; | ||
for (var i = 0; i <= 0xFFFF; i++) { | ||
revCharsTable[i] = 0; | ||
} | ||
enc = codec.type; | ||
break; | ||
var table = options.table; | ||
for (var key in table) { | ||
revCharsTable[table[key]] = +key; | ||
} | ||
case "Function": // Codec itself. | ||
codecOptions.iconv = iconv; | ||
codecData = codec(codecOptions); | ||
iconv.codecData[saveEnc || enc] = codecData; // Save it to be reused later. | ||
return codecData; | ||
default: | ||
throw new Error("Encoding not recognized: '" + encoding + "' (searched as: '"+enc+"')"); | ||
} | ||
return { | ||
toEncoding: toTableEncoding, | ||
fromEncoding: fromTableEncoding, | ||
options: options, | ||
}; | ||
} | ||
} | ||
}, | ||
}; | ||
function toInternalEncoding(str) { | ||
return new Buffer(ensureString(str), this.options.originalEncoding); | ||
// Legacy aliases to convert functions | ||
iconv.toEncoding = iconv.encode; | ||
iconv.fromEncoding = iconv.decode; | ||
// Utilities | ||
function getType(obj) { | ||
return Object.prototype.toString.call(obj).slice(8, -1); | ||
} | ||
function fromInternalEncoding(buf) { | ||
return ensureBuffer(buf).toString(this.options.originalEncoding); | ||
function ensureBuffer(buf) { | ||
buf = buf || new Buffer(0); | ||
return (buf instanceof Buffer) ? buf : new Buffer(""+buf, "binary"); | ||
} | ||
function toTableEncoding(str) { | ||
str = ensureString(str); | ||
var strLen = str.length; | ||
var revCharsTable = this.options.revCharsTable; | ||
var newBuf = new Buffer(strLen*2), gbkcode, unicode, | ||
defaultChar = revCharsTable[iconv.defaultCharUnicode.charCodeAt(0)]; | ||
function ensureString(str) { | ||
str = str || ""; | ||
return (str instanceof Buffer) ? str.toString('utf8') : (""+str); | ||
} | ||
for (var i = 0, j = 0; i < strLen; i++) { | ||
unicode = str.charCodeAt(i); | ||
if (unicode >> 7) { | ||
gbkcode = revCharsTable[unicode] || defaultChar; | ||
newBuf[j++] = gbkcode >> 8; //high byte; | ||
newBuf[j++] = gbkcode & 0xFF; //low byte | ||
} else {//ascii | ||
newBuf[j++] = unicode; | ||
} | ||
// Streaming support for Node v0.10+ | ||
var nodeVer = process.versions.node.split(".").map(Number); | ||
if (nodeVer[0] > 0 || nodeVer[1] >= 10) { | ||
var Transform = require("stream").Transform; | ||
// == Encoder stream ======================================================= | ||
IconvLiteEncoderStream = function IconvLiteEncoderStream(conv, options) { | ||
this.conv = conv; | ||
options = options || {}; | ||
options.decodeStrings = false; // We accept only strings, so we don't need to decode them. | ||
Transform.call(this, options); | ||
} | ||
return newBuf.slice(0, j); | ||
} | ||
function fromTableEncoding(buf) { | ||
buf = ensureBuffer(buf); | ||
var bufLen = buf.length; | ||
var table = this.options.table; | ||
var newBuf = new Buffer(bufLen*2), unicode, gbkcode, | ||
defaultChar = iconv.defaultCharUnicode.charCodeAt(0); | ||
IconvLiteEncoderStream.prototype = Object.create(Transform.prototype, { | ||
constructor: { value: IconvLiteEncoderStream } | ||
}); | ||
for (var i = 0, j = 0; i < bufLen; i++, j+=2) { | ||
gbkcode = buf[i]; | ||
if (gbkcode & 0x80) { | ||
gbkcode = (gbkcode << 8) + buf[++i]; | ||
unicode = table[gbkcode] || defaultChar; | ||
} else { | ||
unicode = gbkcode; | ||
IconvLiteEncoderStream.prototype._transform = function(chunk, encoding, done) { | ||
if (typeof chunk != 'string') | ||
return done(new Error("Iconv encoding stream needs strings as its input.")); | ||
try { | ||
var res = this.conv.write(chunk); | ||
if (res) this.push(res); | ||
done(); | ||
} | ||
newBuf[j] = unicode & 0xFF; //low byte | ||
newBuf[j+1] = unicode >> 8; //high byte | ||
catch (e) { | ||
done(e); | ||
} | ||
} | ||
return newBuf.slice(0, j).toString('ucs2'); | ||
} | ||
function toSingleByteEncoding(str) { | ||
str = ensureString(str); | ||
var buf = new Buffer(str.length); | ||
var revCharsBuf = this.options.revCharsBuf; | ||
for (var i = 0; i < str.length; i++) | ||
buf[i] = revCharsBuf[str.charCodeAt(i)]; | ||
return buf; | ||
} | ||
IconvLiteEncoderStream.prototype._flush = function(done) { | ||
try { | ||
if (this.conv.end) { | ||
var res = this.conv.end(); | ||
if (res) this.push(res); | ||
} | ||
done(); | ||
} | ||
catch (e) { | ||
done(e); | ||
} | ||
} | ||
function fromSingleByteEncoding(buf) { | ||
buf = ensureBuffer(buf); | ||
// Strings are immutable in JS -> we use ucs2 buffer to speed up computations. | ||
var charsBuf = this.options.charsBuf; | ||
var newBuf = new Buffer(buf.length*2); | ||
var idx1 = 0, idx2 = 0; | ||
for (var i = 0, _len = buf.length; i < _len; i++) { | ||
idx1 = buf[i]*2; idx2 = i*2; | ||
newBuf[idx2] = charsBuf[idx1]; | ||
newBuf[idx2+1] = charsBuf[idx1+1]; | ||
// == Decoder stream ======================================================= | ||
IconvLiteDecoderStream = function IconvLiteDecoderStream(conv, options) { | ||
this.conv = conv; | ||
options = options || {}; | ||
options.encoding = this.encoding = 'utf8'; // We output strings. | ||
Transform.call(this, options); | ||
} | ||
return newBuf.toString('ucs2'); | ||
} | ||
// Add aliases to convert functions | ||
iconv.encode = iconv.toEncoding; | ||
iconv.decode = iconv.fromEncoding; | ||
IconvLiteDecoderStream.prototype = Object.create(Transform.prototype, { | ||
constructor: { value: IconvLiteDecoderStream } | ||
}); | ||
// Load other encodings manually from files in /encodings dir. | ||
function loadEncodings() { | ||
if (!iconv.encodingsLoaded) { | ||
[ require('./encodings/singlebyte'), | ||
require('./encodings/gbk'), | ||
require('./encodings/big5') | ||
].forEach(function(encodings) { | ||
for (var key in encodings) | ||
iconv.encodings[key] = encodings[key] | ||
}); | ||
iconv.encodingsLoaded = true; | ||
IconvLiteDecoderStream.prototype._transform = function(chunk, encoding, done) { | ||
if (!Buffer.isBuffer(chunk)) | ||
return done(new Error("Iconv decoding stream needs buffers as its input.")); | ||
try { | ||
var res = this.conv.write(chunk); | ||
if (res) this.push(res, this.encoding); | ||
done(); | ||
} | ||
catch (e) { | ||
done(e); | ||
} | ||
} | ||
} | ||
IconvLiteDecoderStream.prototype._flush = function(done) { | ||
try { | ||
if (this.conv.end) { | ||
var res = this.conv.end(); | ||
if (res) this.push(res, this.encoding); | ||
} | ||
done(); | ||
} | ||
catch (e) { | ||
done(e); | ||
} | ||
} | ||
// Utilities | ||
var asciiString = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'+ | ||
' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f'; | ||
var ensureBuffer = function(buf) { | ||
buf = buf || new Buffer(0); | ||
return (buf instanceof Buffer) ? buf : new Buffer(""+buf, "binary"); | ||
} | ||
var ensureString = function(str) { | ||
str = str || ""; | ||
return (str instanceof Buffer) ? str.toString('utf8') : (""+str); | ||
} | ||
var getType = function(obj) { | ||
return Object.prototype.toString.call(obj).slice(8, -1); | ||
} | ||
{ | ||
"name": "iconv-lite", | ||
"description": "Convert character encodings in pure javascript.", | ||
"version": "0.2.11", | ||
"version": "0.4.0-pre", | ||
"license": "MIT", | ||
"keywords": ["iconv", "convert", "charset"], | ||
"keywords": ["iconv", "convert", "charset", "icu"], | ||
"author": "Alexander Shtuchkin <ashtuchkin@gmail.com>", | ||
@@ -24,2 +24,3 @@ "contributors": [ | ||
"homepage": "https://github.com/ashtuchkin/iconv-lite", | ||
"bugs": "https://github.com/ashtuchkin/iconv-lite/issues", | ||
"repository": { | ||
@@ -30,11 +31,15 @@ "type": "git", | ||
"engines": { | ||
"node": ">=0.4.0" | ||
"node": ">=0.8.0" | ||
}, | ||
"scripts": { | ||
"test": "vows --spec" | ||
"test": "mocha --reporter spec --grep ." | ||
}, | ||
"devDependencies": { | ||
"vows": "", | ||
"iconv": ">=1.1" | ||
"mocha": "*", | ||
"request": "*", | ||
"unorm": "*", | ||
"errto": "*", | ||
"async": "*", | ||
"iconv": "2.x" | ||
} | ||
} |
@@ -1,12 +0,12 @@ | ||
iconv-lite - pure javascript character encoding conversion | ||
====================================================================== | ||
## Pure JS character encoding conversion | ||
[![Build Status](https://secure.travis-ci.org/ashtuchkin/iconv-lite.png?branch=master)](http://travis-ci.org/ashtuchkin/iconv-lite) | ||
<!-- [![Build Status](https://secure.travis-ci.org/ashtuchkin/iconv-lite.png?branch=master)](http://travis-ci.org/ashtuchkin/iconv-lite) --> | ||
## Features | ||
* Doesn't need native code compilation. Works on Windows and in sandboxed environments like [Cloud9](http://c9.io). | ||
* Used in popular projects like [Grunt](http://gruntjs.com/), [Nodemailer](http://www.nodemailer.com/), [Yeoman](http://yeoman.io/) and others. | ||
* Faster than [node-iconv](https://github.com/bnoordhuis/node-iconv) (see below for performance comparison). | ||
* Intuitive encode/decode API. | ||
* License: MIT. | ||
* Pure javascript. Doesn't need native code compilation. | ||
* Easy API. | ||
* Works on Windows and in sandboxed environments like [Cloud9](http://c9.io). | ||
* Encoding is much faster than node-iconv (see below for performance comparison). | ||
[![NPM Stats](https://nodei.co/npm/iconv-lite.png?downloads=true)](https://npmjs.org/packages/iconv-lite/) | ||
@@ -17,6 +17,6 @@ ## Usage | ||
// Convert from an encoded buffer to string. | ||
// Convert from an encoded buffer to js string. | ||
str = iconv.decode(buf, 'win1251'); | ||
// Convert from string to an encoded buffer. | ||
// Convert from js string to an encoded buffer. | ||
buf = iconv.encode("Sample input string", 'win1251'); | ||
@@ -27,13 +27,12 @@ | ||
## Supported encodings | ||
* All node.js native encodings: 'utf8', 'ucs2', 'ascii', 'binary', 'base64' | ||
* All widespread single byte encodings: Windows 125x family, ISO-8859 family, | ||
* All node.js native encodings: 'utf8', 'ucs2', 'ascii', 'binary', 'base64' | ||
* All widespread single byte encodings: Windows 125x family, ISO-8859 family, | ||
IBM/DOS codepages, Macintosh family, KOI8 family. | ||
Aliases like 'latin1', 'us-ascii' also supported. | ||
* Multibyte encodings: 'gbk', 'gb2313', 'Big5', 'cp950'. | ||
* Multibyte encodings: 'gbk', 'gb2313', 'Big5', 'cp950'. | ||
Others are easy to add, see the source. Please, participate. | ||
Most encodings are generated from node-iconv. Thank you Ben Noordhuis and iconv authors! | ||
Most encodings are generated automatically from [node-iconv](https://github.com/bnoordhuis/node-iconv). Thank you Ben Noordhuis and iconv authors! | ||
@@ -45,8 +44,8 @@ Not supported yet: EUC family, Shift_JIS. | ||
Comparison with node-iconv module (1000x256kb, on Ubuntu 12.04, Core i5/2.5 GHz, Node v0.8.7). | ||
Comparison with node-iconv module (1000x256kb, on Ubuntu 12.04, Core i5/2.5 GHz, Node v0.10.25). | ||
Note: your results may vary, so please always check on your hardware. | ||
operation iconv@1.2.4 iconv-lite@0.2.4 | ||
operation iconv@2.0.7 iconv-lite@0.2.11 | ||
---------------------------------------------------------- | ||
encode('win1251') ~115 Mb/s ~230 Mb/s | ||
encode('win1251') ~115 Mb/s ~237 Mb/s | ||
decode('win1251') ~95 Mb/s ~130 Mb/s | ||
@@ -72,5 +71,8 @@ | ||
* Support streaming character conversion, something like util.pipe(req, iconv.fromEncodingStream('latin1')). | ||
* Add more encodings. | ||
* Add transliteration (best fit char). | ||
* Add tests and correct support of variable-byte encodings (currently work is delegated to node). | ||
* Support streaming character conversion, something like util.pipe(req, iconv.fromEncodingStream('latin1')). | ||
* Add more encodings. | ||
* Add transliteration (best fit char). | ||
* Add tests and correct support of variable-byte encodings (currently work is delegated to node). | ||
## Adoption | ||
[![NPM](https://nodei.co/npm-dl/iconv-lite.png)](https://nodei.co/npm/iconv-lite/) |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
Network access
Supply chain riskThis module accesses the network.
Found 1 instance in 1 package
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
No bug tracker
MaintenancePackage does not have a linked bug tracker in package.json.
Found 1 instance in 1 package
2465
75
2
234193
6
21